diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
index 5e5fd186bea..6432bf0e529 100644
--- a/.github/workflows/build.yml
+++ b/.github/workflows/build.yml
@@ -40,7 +40,7 @@ jobs:
           python -m pip install --upgrade pip wheel
           pip install -e .
           python -c "import autogen"
-          pip install -e.[mathchat,retrievechat,test] datasets pytest
+          pip install -e. pytest
           pip uninstall -y openai
       - name: Test with pytest
         if: matrix.python-version != '3.10'
@@ -49,7 +49,8 @@ jobs:
       - name: Coverage
         if: matrix.python-version == '3.10'
         run: |
-          pip install coverage
+          pip install -e .[mathchat,test]
+          pip uninstall -y openai
           coverage run -a -m pytest test
           coverage xml
       - name: Upload coverage to Codecov
diff --git a/.github/workflows/contrib-openai.yml b/.github/workflows/contrib-openai.yml
new file mode 100644
index 00000000000..2f250071639
--- /dev/null
+++ b/.github/workflows/contrib-openai.yml
@@ -0,0 +1,58 @@
+# This workflow will install Python dependencies and run tests
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: OpenAI4ContribTests
+
+on:
+  pull_request_target:
+    branches: ['main']
+    paths:
+      - 'autogen/**'
+      - 'test/agentchat/contrib/**'
+      - '.github/workflows/contrib-openai.yml'
+      - 'setup.py'
+
+jobs:
+  RetrieveChatTest:
+    strategy:
+      matrix:
+        os: [ubuntu-latest]
+        python-version: ["3.10"]
+    runs-on: ${{ matrix.os }}
+    environment: openai1
+    steps:
+      # checkout to pr branch
+      - name: Checkout
+        uses: actions/checkout@v3
+        with:
+          ref: ${{ github.event.pull_request.head.sha }}
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install packages and dependencies
+        run: |
+          docker --version
+          python -m pip install --upgrade pip wheel
+          pip install -e .
+          python -c "import autogen"
+          pip install coverage pytest-asyncio
+      - name: Install packages for test when needed
+        run: |
+          pip install docker
+          pip install qdrant_client[fastembed]
+          pip install -e .[retrievechat]
+      - name: Coverage
+        env:
+          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
+          AZURE_OPENAI_API_KEY: ${{ secrets.AZURE_OPENAI_API_KEY }}
+          AZURE_OPENAI_API_BASE: ${{ secrets.AZURE_OPENAI_API_BASE }}
+          OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }}
+        run: |
+          coverage run -a -m pytest test/agentchat/contrib/test_retrievechat.py test/agentchat/contrib/test_qdrant_retrievechat.py
+          coverage xml
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v3
+        with:
+          file: ./coverage.xml
+          flags: unittests
diff --git a/.github/workflows/contrib-tests.yml b/.github/workflows/contrib-tests.yml
new file mode 100644
index 00000000000..75381892658
--- /dev/null
+++ b/.github/workflows/contrib-tests.yml
@@ -0,0 +1,59 @@
+# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
+
+name: ContribTests
+
+on:
+  pull_request:
+    branches: ['main', 'dev/v0.2']
+    paths:
+      - 'autogen/**'
+      - 'test/agentchat/contrib/**'
+      - '.github/workflows/contrib-tests.yml'
+      - 'setup.py'
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref }}-${{ github.head_ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+jobs:
+  RetrieveChatTest:
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, macos-latest, windows-2019]
+        python-version: ["3.8", "3.9", "3.10", "3.11"]
+    steps:
+      - uses: actions/checkout@v3
+      - name: Set up Python ${{ matrix.python-version }}
+        uses: actions/setup-python@v4
+        with:
+          python-version: ${{ matrix.python-version }}
+      - name: Install packages and dependencies for all tests
+        run: |
+          python -m pip install --upgrade pip wheel
+          pip install pytest
+      - name: Install qdrant_client when python-version is 3.10
+        if: matrix.python-version == '3.10' || matrix.python-version == '3.8'
+        run: |
+          pip install qdrant_client[fastembed]
+      - name: Install packages and dependencies for RetrieveChat
+        run: |
+          pip install -e .[retrievechat]
+          pip uninstall -y openai
+      - name: Test RetrieveChat
+        run: |
+          pytest test/test_retrieve_utils.py test/agentchat/contrib/test_retrievechat.py test/agentchat/contrib/test_qdrant_retrievechat.py
+      - name: Coverage
+        if: matrix.python-version == '3.10'
+        run: |
+          pip install coverage>=5.3
+          coverage run -a -m pytest test/test_retrieve_utils.py test/agentchat/contrib
+          coverage xml
+      - name: Upload coverage to Codecov
+        if: matrix.python-version == '3.10'
+        uses: codecov/codecov-action@v3
+        with:
+          file: ./coverage.xml
+          flags: unittests
diff --git a/.github/workflows/openai.yml b/.github/workflows/openai.yml
index 5c334eec3b0..6f966337bed 100644
--- a/.github/workflows/openai.yml
+++ b/.github/workflows/openai.yml
@@ -1,4 +1,4 @@
-# This workflow will install Python dependencies, run tests and lint with a variety of Python versions
+# This workflow will install Python dependencies and run tests with a variety of Python versions
 # For more information see: https://help.github.com/actions/language-and-framework-guides/using-python-with-github-actions
 
 name: OpenAI
@@ -11,9 +11,6 @@ on:
       - 'test/**'
       - 'notebook/agentchat_auto_feedback_from_code_execution.ipynb'
       - 'notebook/agentchat_function_call.ipynb'
-      - 'notebook/agentchat_MathChat.ipynb'
-      - 'notebook/oai_completion.ipynb'
-      - 'notebook/oai_chatgpt_gpt4.ipynb'
       - '.github/workflows/openai.yml'
 
 jobs:
@@ -23,7 +20,7 @@ jobs:
         os: [ubuntu-latest]
         python-version: ["3.9", "3.10", "3.11"]
     runs-on: ${{ matrix.os }}
-    environment: openai
+    environment: openai1
     steps:
       # checkout to pr branch
       - name: Checkout
@@ -38,28 +35,17 @@ jobs:
         run: |
           docker --version
           python -m pip install --upgrade pip wheel
-          pip install -e.[blendsearch]
+          pip install -e.
           python -c "import autogen"
-          pip install coverage pytest-asyncio datasets
+          pip install coverage pytest-asyncio
       - name: Install packages for test when needed
         if: matrix.python-version == '3.9'
         run: |
           pip install docker
-      - name: Install packages for MathChat when needed
-        if: matrix.python-version != '3.11'
+      - name: Install dependencies for test when needed
+        if: matrix.python-version == '3.10'  # test_agentchat_function_call
         run: |
-          pip install -e .[mathchat]
-      - name: Install packages for RetrieveChat when needed
-        if: matrix.python-version == '3.9'
-        run: |
-          pip install -e .[retrievechat]
-      - name: Install packages for Teachable when needed
-        run: |
-          pip install -e .[teachable]
-      - name: Install packages for RetrieveChat with QDrant when needed
-        if: matrix.python-version == '3.11'
-        run: |
-          pip install -e .[retrievechat] qdrant_client[fastembed]
+          pip install -e.[mathchat]
       - name: Coverage
         if: matrix.python-version == '3.9'
         env:
@@ -80,8 +66,7 @@ jobs:
           OAI_CONFIG_LIST: ${{ secrets.OAI_CONFIG_LIST }}
         run: |
           pip install nbconvert nbformat ipykernel
-          coverage run -a -m pytest test/agentchat/test_qdrant_retrievechat.py
-          coverage run -a -m pytest test/test_with_openai.py
+          coverage run -a -m pytest test/agentchat/test_function_call_groupchat.py
           coverage run -a -m pytest test/test_notebook.py
           coverage xml
           cat "$(pwd)/test/executed_openai_notebook_output.txt"
diff --git a/OAI_CONFIG_LIST_sample b/OAI_CONFIG_LIST_sample
index 01608aeeef8..c3071921118 100644
--- a/OAI_CONFIG_LIST_sample
+++ b/OAI_CONFIG_LIST_sample
@@ -7,14 +7,14 @@
     {
         "model": "gpt-4",
         "api_key": "<your Azure OpenAI API key here>",
-        "api_base": "<your Azure OpenAI API base here>",
+        "base_url": "<your Azure OpenAI API base here>",
         "api_type": "azure",
         "api_version": "2023-07-01-preview"
     },
     {
         "model": "gpt-3.5-turbo",
         "api_key": "<your Azure OpenAI API key here>",
-        "api_base": "<your Azure OpenAI API base here>",
+        "base_url": "<your Azure OpenAI API base here>",
         "api_type": "azure",
         "api_version": "2023-07-01-preview"
     }
diff --git a/README.md b/README.md
index 3cfcb7ff1eb..3a53475554e 100644
--- a/README.md
+++ b/README.md
@@ -28,11 +28,11 @@ AutoGen is a framework that enables the development of LLM applications using mu
 
 ![AutoGen Overview](https://github.com/microsoft/autogen/blob/main/website/static/img/autogen_agentchat.png)
 
-- AutoGen enables building next-gen LLM applications based on **multi-agent conversations** with minimal effort. It simplifies the orchestration, automation, and optimization of a complex LLM workflow. It maximizes the performance of LLM models and overcomes their weaknesses.
-- It supports **diverse conversation patterns** for complex workflows. With customizable and conversable agents, developers can use AutoGen to build a wide range of conversation patterns concerning conversation autonomy,
+- AutoGen enables building next-gen LLM applications based on [multi-agent conversations](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat) with minimal effort. It simplifies the orchestration, automation, and optimization of a complex LLM workflow. It maximizes the performance of LLM models and overcomes their weaknesses.
+- It supports [diverse conversation patterns](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat#supporting-diverse-conversation-patterns) for complex workflows. With customizable and conversable agents, developers can use AutoGen to build a wide range of conversation patterns concerning conversation autonomy,
   the number of agents, and agent conversation topology.
-- It provides a collection of working systems with different complexities. These systems span a **wide range of applications** from various domains and complexities. This demonstrates how AutoGen can easily support diverse conversation patterns.
-- AutoGen provides **enhanced LLM inference**. It offers easy performance tuning, plus utilities like API unification and caching, and advanced usage patterns, such as error handling, multi-config inference, context programming, etc.
+- It provides a collection of working systems with different complexities. These systems span a [wide range of applications](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat#diverse-applications-implemented-with-autogen) from various domains and complexities. This demonstrates how AutoGen can easily support diverse conversation patterns.
+- AutoGen provides [enhanced LLM inference](https://microsoft.github.io/autogen/docs/Use-Cases/enhanced_inference#api-unification). It offers utilities like API unification and caching, and advanced usage patterns, such as error handling, multi-config inference, context programming, etc.
 
 AutoGen is powered by collaborative [research studies](https://microsoft.github.io/autogen/docs/Research) from Microsoft, Penn State University, and the University of Washington.
 
@@ -42,14 +42,14 @@ The easiest way to start playing is
 
     [![Open in GitHub Codespaces](https://github.com/codespaces/badge.svg)](https://codespaces.new/microsoft/autogen?quickstart=1)
 
- 2. Copy OAI_CONFIG_LIST_sample to /notebook folder, name to OAI_CONFIG_LIST, and set the correct configuration.
+ 2. Copy OAI_CONFIG_LIST_sample to ./notebook folder, name to OAI_CONFIG_LIST, and set the correct configuration.
  3. Start playing with the notebooks!
 
 
 
 ## Installation
 
-AutoGen requires **Python version >= 3.8**. It can be installed from pip:
+AutoGen requires **Python version >= 3.8, < 3.12**. It can be installed from pip:
 
 ```bash
 pip install pyautogen
@@ -72,7 +72,7 @@ For LLM inference configurations, check the [FAQs](https://microsoft.github.io/a
 
 ## Multi-Agent Conversation Framework
 
-Autogen enables the next-gen LLM applications with a generic multi-agent conversation framework. It offers customizable and conversable agents that integrate LLMs, tools, and humans.
+Autogen enables the next-gen LLM applications with a generic [multi-agent conversation](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat) framework. It offers customizable and conversable agents that integrate LLMs, tools, and humans.
 By automating chat among multiple capable agents, one can easily make them collectively perform tasks autonomously or with human feedback, including tasks that require using tools via code.
 
 Features of this use case include:
@@ -106,14 +106,16 @@ After the repo is cloned.
 The figure below shows an example conversation flow with AutoGen.
 ![Agent Chat Example](https://github.com/microsoft/autogen/blob/main/website/static/img/chat_example.png)
 
-Please find more [code examples](https://microsoft.github.io/autogen/docs/Examples/AutoGen-AgentChat) for this feature.
+Please find more [code examples](https://microsoft.github.io/autogen/docs/Examples/AgentChat) for this feature.
 
 ## Enhanced LLM Inferences
 
-Autogen also helps maximize the utility out of the expensive LLMs such as ChatGPT and GPT-4. It offers enhanced LLM inference with powerful functionalities like tuning, caching, error handling, and templating. For example, you can optimize generations by LLM with your own tuning data, success metrics, and budgets.
+Autogen also helps maximize the utility out of the expensive LLMs such as ChatGPT and GPT-4. It offers [enhanced LLM inference](https://microsoft.github.io/autogen/docs/Use-Cases/enhanced_inference#api-unification) with powerful functionalities like caching, error handling, multi-config inference and templating.
+
+<!-- For example, you can optimize generations by LLM with your own tuning data, success metrics, and budgets.
 
 ```python
-# perform tuning
+# perform tuning for openai<1
 config, analysis = autogen.Completion.tune(
     data=tune_data,
     metric="success",
@@ -127,7 +129,7 @@ config, analysis = autogen.Completion.tune(
 response = autogen.Completion.create(context=test_instance, **config)
 ```
 
-Please find more [code examples](https://microsoft.github.io/autogen/docs/Examples/AutoGen-Inference) for this feature.
+Please find more [code examples](https://microsoft.github.io/autogen/docs/Examples/Inference) for this feature. -->
 
 ## Documentation
 
diff --git a/autogen/agentchat/assistant_agent.py b/autogen/agentchat/assistant_agent.py
index cac322774ba..4a0200fb672 100644
--- a/autogen/agentchat/assistant_agent.py
+++ b/autogen/agentchat/assistant_agent.py
@@ -43,7 +43,7 @@ def __init__(
             system_message (str): system message for the ChatCompletion inference.
                 Please override this attribute if you want to reprogram the agent.
             llm_config (dict): llm inference configuration.
-                Please refer to [Completion.create](/docs/reference/oai/completion#create)
+                Please refer to [OpenAIWrapper.create](/docs/reference/oai/client#create)
                 for available options.
             is_termination_msg (function): a function that takes a message in the form of a dictionary
                 and returns a boolean value indicating if this received message is a termination message.
diff --git a/autogen/agentchat/contrib/teachable_agent.py b/autogen/agentchat/contrib/teachable_agent.py
index 8db5b699ea2..29d7f197ffc 100644
--- a/autogen/agentchat/contrib/teachable_agent.py
+++ b/autogen/agentchat/contrib/teachable_agent.py
@@ -18,7 +18,7 @@ def colored(x, *args, **kwargs):
 
 
 class TeachableAgent(ConversableAgent):
-    """Teachable Agent, a subclass of ConversableAgent using a vector database to remember user teachings.
+    """(Experimental) Teachable Agent, a subclass of ConversableAgent using a vector database to remember user teachings.
     In this class, the term 'user' refers to any caller (human or not) sending messages to this agent.
     Not yet tested in the group-chat setting."""
 
@@ -40,7 +40,7 @@ def __init__(
             system_message (str): system message for the ChatCompletion inference.
             human_input_mode (str): This agent should NEVER prompt the human for input.
             llm_config (dict or False): llm inference configuration.
-                Please refer to [Completion.create](/docs/reference/oai/completion#create)
+                Please refer to [OpenAIWrapper.create](/docs/reference/oai/client#create)
                 for available options.
                 To disable llm-based auto reply, set to False.
             analyzer_llm_config (dict or False): llm inference configuration passed to TextAnalyzerAgent.
@@ -125,11 +125,8 @@ def _generate_teachable_assistant_reply(
                 messages = messages.copy()
                 messages[-1]["content"] = new_user_text
 
-        # Generate a response.
-        msgs = self._oai_system_message + messages
-        response = oai.ChatCompletion.create(messages=msgs, **self.llm_config)
-        response_text = oai.ChatCompletion.extract_text_or_function_call(response)[0]
-        return True, response_text
+        # Generate a response by reusing existing generate_oai_reply
+        return self.generate_oai_reply(messages, sender, config)
 
     def learn_from_user_feedback(self):
         """Reviews the user comments from the last chat, and decides what teachings to store as memos."""
@@ -265,12 +262,14 @@ def analyze(self, text_to_analyze, analysis_instructions):
             self.send(recipient=self.analyzer, message=analysis_instructions, request_reply=True)  # Request the reply.
             return self.last_message(self.analyzer)["content"]
         else:
+            # TODO: This is not an encouraged usage pattern. It breaks the conversation-centric design.
+            # consider using the arg "silent"
             # Use the analyzer's method directly, to leave analyzer message out of the printed chat.
             return self.analyzer.analyze_text(text_to_analyze, analysis_instructions)
 
 
 class MemoStore:
-    """
+    """(Experimental)
     Provides memory storage and retrieval for a TeachableAgent, using a vector database.
     Each DB entry (called a memo) is a pair of strings: an input text and an output text.
     The input text might be a question, or a task to perform.
diff --git a/autogen/agentchat/contrib/text_analyzer_agent.py b/autogen/agentchat/contrib/text_analyzer_agent.py
index 8cf88eba6ae..cfc5bc174b3 100644
--- a/autogen/agentchat/contrib/text_analyzer_agent.py
+++ b/autogen/agentchat/contrib/text_analyzer_agent.py
@@ -10,7 +10,7 @@
 
 
 class TextAnalyzerAgent(ConversableAgent):
-    """Text Analysis agent, a subclass of ConversableAgent designed to analyze text as instructed."""
+    """(Experimental) Text Analysis agent, a subclass of ConversableAgent designed to analyze text as instructed."""
 
     def __init__(
         self,
@@ -26,7 +26,7 @@ def __init__(
             system_message (str): system message for the ChatCompletion inference.
             human_input_mode (str): This agent should NEVER prompt the human for input.
             llm_config (dict or False): llm inference configuration.
-                Please refer to [Completion.create](/docs/reference/oai/completion#create)
+                Please refer to [OpenAIWrapper.create](/docs/reference/oai/client#create)
                 for available options.
                 To disable llm-based auto reply, set to False.
             teach_config (dict or None): Additional parameters used by TeachableAgent.
@@ -74,9 +74,5 @@ def analyze_text(self, text_to_analyze, analysis_instructions):
         msg_text = "\n".join(
             [analysis_instructions, text_to_analyze, analysis_instructions]
         )  # Repeat the instructions.
-        messages = self._oai_system_message + [{"role": "user", "content": msg_text}]
-
         # Generate and return the analysis string.
-        response = oai.ChatCompletion.create(context=None, messages=messages, **self.llm_config)
-        output_text = oai.ChatCompletion.extract_text_or_function_call(response)[0]
-        return output_text
+        return self.generate_oai_reply([{"role": "user", "content": msg_text}], None, None)[1]
diff --git a/autogen/agentchat/conversable_agent.py b/autogen/agentchat/conversable_agent.py
index 017ba4e848a..157e11c39be 100644
--- a/autogen/agentchat/conversable_agent.py
+++ b/autogen/agentchat/conversable_agent.py
@@ -4,7 +4,7 @@
 import json
 import logging
 from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union
-from autogen import oai
+from autogen import OpenAIWrapper
 from .agent import Agent
 from autogen.code_utils import (
     DEFAULT_MODEL,
@@ -93,7 +93,7 @@ def __init__(
                 - timeout (Optional, int): The maximum execution time in seconds.
                 - last_n_messages (Experimental, Optional, int): The number of messages to look back for code execution. Default to 1.
             llm_config (dict or False): llm inference configuration.
-                Please refer to [Completion.create](/docs/reference/oai/completion#create)
+                Please refer to [OpenAIWrapper.create](/docs/reference/oai/client#create)
                 for available options.
                 To disable llm-based auto reply, set to False.
             default_auto_reply (str or dict or None): default auto reply when no code execution or llm-based reply is generated.
@@ -107,10 +107,12 @@ def __init__(
         )
         if llm_config is False:
             self.llm_config = False
+            self.client = None
         else:
             self.llm_config = self.DEFAULT_CONFIG.copy()
             if isinstance(llm_config, dict):
                 self.llm_config.update(llm_config)
+            self.client = OpenAIWrapper(**self.llm_config)
 
         self._code_execution_config = {} if code_execution_config is None else code_execution_config
         self.human_input_mode = human_input_mode
@@ -255,8 +257,10 @@ def _message_to_dict(message: Union[Dict, str]):
         """
         if isinstance(message, str):
             return {"content": message}
-        else:
+        elif isinstance(message, dict):
             return message
+        else:
+            return dict(message)
 
     def _append_oai_message(self, message: Union[Dict, str], role, conversation_id: Agent) -> bool:
         """Append a message to the ChatCompletion conversation.
@@ -286,6 +290,7 @@ def _append_oai_message(self, message: Union[Dict, str], role, conversation_id:
         oai_message["role"] = "function" if message.get("role") == "function" else role
         if "function_call" in oai_message:
             oai_message["role"] = "assistant"  # only messages with role 'assistant' can have a function call.
+            oai_message["function_call"] = dict(oai_message["function_call"])
         self._oai_messages[conversation_id].append(oai_message)
         return True
 
@@ -307,7 +312,7 @@ def send(
                 - role (str): the role of the message, any role that is not "function"
                     will be modified to "assistant".
                 - context (dict): the context of the message, which will be passed to
-                    [Completion.create](../oai/Completion#create).
+                    [OpenAIWrapper.create](../oai/client#create).
                     For example, one agent can send a message A as:
         ```python
         {
@@ -356,7 +361,7 @@ async def a_send(
                 - role (str): the role of the message, any role that is not "function"
                     will be modified to "assistant".
                 - context (dict): the context of the message, which will be passed to
-                    [Completion.create](../oai/Completion#create).
+                    [OpenAIWrapper.create](../oai/client#create).
                     For example, one agent can send a message A as:
         ```python
         {
@@ -399,18 +404,21 @@ def _print_received_message(self, message: Union[Dict, str], sender: Agent):
             content = message.get("content")
             if content is not None:
                 if "context" in message:
-                    content = oai.ChatCompletion.instantiate(
+                    content = OpenAIWrapper.instantiate(
                         content,
                         message["context"],
                         self.llm_config and self.llm_config.get("allow_format_str_template", False),
                     )
                 print(content, flush=True)
             if "function_call" in message:
-                func_print = f"***** Suggested function Call: {message['function_call'].get('name', '(No function name found)')} *****"
+                function_call = dict(message["function_call"])
+                func_print = (
+                    f"***** Suggested function Call: {function_call.get('name', '(No function name found)')} *****"
+                )
                 print(colored(func_print, "green"), flush=True)
                 print(
                     "Arguments: \n",
-                    message["function_call"].get("arguments", "(No arguments found)"),
+                    function_call.get("arguments", "(No arguments found)"),
                     flush=True,
                     sep="",
                 )
@@ -448,7 +456,7 @@ def receive(
                     This field is only needed to distinguish between "function" or "assistant"/"user".
                 4. "name": In most cases, this field is not needed. When the role is "function", this field is needed to indicate the function name.
                 5. "context" (dict): the context of the message, which will be passed to
-                    [Completion.create](../oai/Completion#create).
+                    [OpenAIWrapper.create](../oai/client#create).
             sender: sender of an Agent instance.
             request_reply (bool or None): whether a reply is requested from the sender.
                 If None, the value is determined by `self.reply_at_receive[sender]`.
@@ -484,7 +492,7 @@ async def a_receive(
                     This field is only needed to distinguish between "function" or "assistant"/"user".
                 4. "name": In most cases, this field is not needed. When the role is "function", this field is needed to indicate the function name.
                 5. "context" (dict): the context of the message, which will be passed to
-                    [Completion.create](../oai/Completion#create).
+                    [OpenAIWrapper.create](../oai/client#create).
             sender: sender of an Agent instance.
             request_reply (bool or None): whether a reply is requested from the sender.
                 If None, the value is determined by `self.reply_at_receive[sender]`.
@@ -597,17 +605,17 @@ def generate_oai_reply(
         config: Optional[Any] = None,
     ) -> Tuple[bool, Union[str, Dict, None]]:
         """Generate a reply using autogen.oai."""
-        llm_config = self.llm_config if config is None else config
-        if llm_config is False:
+        client = self.client if config is None else config
+        if client is None:
             return False, None
         if messages is None:
             messages = self._oai_messages[sender]
 
         # TODO: #1143 handle token limit exceeded error
-        response = oai.ChatCompletion.create(
-            context=messages[-1].pop("context", None), messages=self._oai_system_message + messages, **llm_config
+        response = client.create(
+            context=messages[-1].pop("context", None), messages=self._oai_system_message + messages
         )
-        return True, oai.ChatCompletion.extract_text_or_function_call(response)[0]
+        return True, client.extract_text_or_function_call(response)[0]
 
     def generate_code_execution_reply(
         self,
diff --git a/autogen/agentchat/groupchat.py b/autogen/agentchat/groupchat.py
index f1c549bc18b..174f62f4e15 100644
--- a/autogen/agentchat/groupchat.py
+++ b/autogen/agentchat/groupchat.py
@@ -10,7 +10,7 @@
 
 @dataclass
 class GroupChat:
-    """A group chat class that contains the following data fields:
+    """(In preview) A group chat class that contains the following data fields:
     - agents: a list of participating agents.
     - messages: a list of messages in the group chat.
     - max_round: the maximum number of rounds.
diff --git a/autogen/agentchat/user_proxy_agent.py b/autogen/agentchat/user_proxy_agent.py
index ae5f908d8fe..d72c2bdceba 100644
--- a/autogen/agentchat/user_proxy_agent.py
+++ b/autogen/agentchat/user_proxy_agent.py
@@ -63,7 +63,7 @@ def __init__(
                 - last_n_messages (Experimental, Optional, int): The number of messages to look back for code execution. Default to 1.
             default_auto_reply (str or dict or None): the default auto reply message when no code execution or llm based reply is generated.
             llm_config (dict or False): llm inference configuration.
-                Please refer to [Completion.create](/docs/reference/oai/completion#create)
+                Please refer to [OpenAIWrapper.create](/docs/reference/oai/client#create)
                 for available options.
                 Default to false, which disables llm-based auto reply.
             system_message (str): system message for ChatCompletion inference.
diff --git a/autogen/code_utils.py b/autogen/code_utils.py
index caaf0907285..9965609f1b8 100644
--- a/autogen/code_utils.py
+++ b/autogen/code_utils.py
@@ -84,49 +84,8 @@ def extract_code(
     return extracted
 
 
-# _FIND_CODE_SYS_MSG = [
-#     {
-#         "role": "system",
-#         "content": """In the following conversation, an assistant suggests code and a user is expected to run it.
-# Read the conversation, and then find all the right code blocks for the user to run next in the right order.
-# Only return the code blocks that are expected to run.
-# Don't include code blocks which have been executed unless the user is requested to run the same block again.
-# When the user needs to run multiple blocks in sequence, make sure to output all the blocks to run in a right order.
-# If the line beginning with "# filename" is put before a code block, move it into the code block as the first line.
-# Make sure to add the right "python" or "sh" identifier if the language identifier is missing for a code block.
-# Don't make other changes to the code blocks.
-# Don't reply anything else if at least one code block is expected to run.
-# If no code block is expeted to run, check whether the task has been successfully finished at full satisfaction.
-# If not, reply with the reason why the task is not finished.""",
-#     },
-# ]
-# _FIND_CODE_CONFIG = {
-#     "model": FAST_MODEL,
-# }
-
-
-# def find_code(messages: List[Dict], sys_msg=None, **config) -> Tuple[List[Tuple[str, str]], str]:
-#     """Find code from a list of messages.
-
-#     Args:
-#         messages (str): The list of messages to find code from.
-#         sys_msg (Optional, str): The system message to prepend to the messages.
-#         config (Optional, dict): The configuration for the API call.
-
-#     Returns:
-#         list: A list of tuples, each containing the language and the code.
-#         str: The generated text by llm.
-#     """
-#     params = {**_FIND_CODE_CONFIG, **config}
-#     if sys_msg is None or not sys_msg[0]["content"]:
-#         sys_msg = _FIND_CODE_SYS_MSG
-#     response = oai.ChatCompletion.create(messages=sys_msg + messages, **params)
-#     content = oai.Completion.extract_text(response)[0]
-#     return extract_code(content), content
-
-
 def generate_code(pattern: str = CODE_BLOCK_PATTERN, **config) -> Tuple[str, float]:
-    """Generate code.
+    """(openai<1) Generate code.
 
     Args:
         pattern (Optional, str): The regular expression pattern for finding the code block.
@@ -151,7 +110,7 @@ def generate_code(pattern: str = CODE_BLOCK_PATTERN, **config) -> Tuple[str, flo
 
 
 def improve_function(file_name, func_name, objective, **config):
-    """(work in progress) Improve the function to achieve the objective."""
+    """(openai<1) Improve the function to achieve the objective."""
     params = {**_IMPROVE_FUNCTION_CONFIG, **config}
     # read the entire file into a str
     with open(file_name, "r") as f:
@@ -172,7 +131,7 @@ def improve_function(file_name, func_name, objective, **config):
 
 
 def improve_code(files, objective, suggest_only=True, **config):
-    """Improve the code to achieve a given objective.
+    """(openai<1) Improve the code to achieve a given objective.
 
     Args:
         files (list): A list of file names containing the source code.
@@ -422,7 +381,7 @@ def execute_code(
 
 
 def generate_assertions(definition: str, **config) -> Tuple[str, float]:
-    """Generate assertions for a function.
+    """(openai<1) Generate assertions for a function.
 
     Args:
         definition (str): The function definition, including the signature and docstr.
@@ -459,7 +418,7 @@ def eval_function_completions(
     timeout: Optional[float] = 3,
     use_docker: Optional[bool] = True,
 ) -> Dict:
-    """Select a response from a list of responses for the function completion task (using generated assertions), and/or evaluate if the task is successful using a gold test.
+    """(openai<1) Select a response from a list of responses for the function completion task (using generated assertions), and/or evaluate if the task is successful using a gold test.
 
     Args:
         responses (list): The list of responses.
@@ -549,7 +508,7 @@ def __init__(self, assertions):
         self.metrics = self.responses = None
 
     def pass_assertions(self, context, response, **_):
-        """Check if the response passes the assertions."""
+        """(openai<1) Check if the response passes the assertions."""
         responses = oai.Completion.extract_text(response)
         metrics = eval_function_completions(responses, context["definition"], assertions=self._assertions)
         self._assertions = metrics["assertions"]
@@ -564,7 +523,7 @@ def implement(
     configs: Optional[List[Dict]] = None,
     assertions: Optional[Union[str, Callable[[str], Tuple[str, float]]]] = generate_assertions,
 ) -> Tuple[str, float]:
-    """Implement a function from a definition.
+    """(openai<1) Implement a function from a definition.
 
     Args:
         definition (str): The function definition, including the signature and docstr.
diff --git a/autogen/math_utils.py b/autogen/math_utils.py
index 7f35470fa6b..00fcae57ad2 100644
--- a/autogen/math_utils.py
+++ b/autogen/math_utils.py
@@ -9,7 +9,7 @@
 
 
 def solve_problem(problem: str, **config) -> str:
-    """(Experimental) Solve the math problem.
+    """(openai<1) Solve the math problem.
 
     Args:
         problem (str): The problem statement.
diff --git a/autogen/oai/__init__.py b/autogen/oai/__init__.py
index a1b34b33683..dbcd2f79607 100644
--- a/autogen/oai/__init__.py
+++ b/autogen/oai/__init__.py
@@ -1,3 +1,4 @@
+from autogen.oai.client import OpenAIWrapper
 from autogen.oai.completion import Completion, ChatCompletion
 from autogen.oai.openai_utils import (
     get_config_list,
@@ -9,6 +10,7 @@
 )
 
 __all__ = [
+    "OpenAIWrapper",
     "Completion",
     "ChatCompletion",
     "get_config_list",
diff --git a/autogen/oai/client.py b/autogen/oai/client.py
new file mode 100644
index 00000000000..96dc02d38de
--- /dev/null
+++ b/autogen/oai/client.py
@@ -0,0 +1,270 @@
+from __future__ import annotations
+
+import os
+import sys
+from typing import List, Optional, Dict, Callable
+import logging
+import inspect
+from flaml.automl.logger import logger_formatter
+
+from autogen.oai.openai_utils import get_key
+
+try:
+    from openai import OpenAI, APIError
+    from openai.types.chat import ChatCompletion
+    from openai.types.completion import Completion
+    import diskcache
+
+    ERROR = None
+except ImportError:
+    ERROR = ImportError("Please install openai>=1 and diskcache to use autogen.OpenAIWrapper.")
+    OpenAI = object
+logger = logging.getLogger(__name__)
+if not logger.handlers:
+    # Add the console handler.
+    _ch = logging.StreamHandler(stream=sys.stdout)
+    _ch.setFormatter(logger_formatter)
+    logger.addHandler(_ch)
+
+
+class OpenAIWrapper:
+    """A wrapper class for openai client."""
+
+    cache_path_root: str = ".cache"
+    extra_kwargs = {"seed", "filter_func", "allow_format_str_template", "context", "api_version"}
+    openai_kwargs = set(inspect.getfullargspec(OpenAI.__init__).kwonlyargs)
+
+    def __init__(self, *, config_list: List[Dict] = None, **base_config):
+        """
+        Args:
+            config_list: a list of config dicts to override the base_config.
+                They can contain additional kwargs as allowed in the [create](/docs/reference/oai/client#create) method. E.g.,
+
+        ```python
+        config_list=[
+            {
+                "model": "gpt-4",
+                "api_key": os.environ.get("AZURE_OPENAI_API_KEY"),
+                "api_type": "azure",
+                "base_url": os.environ.get("AZURE_OPENAI_API_BASE"),
+                "api_version": "2023-03-15-preview",
+            },
+            {
+                "model": "gpt-3.5-turbo",
+                "api_key": os.environ.get("OPENAI_API_KEY"),
+                "api_type": "open_ai",
+                "base_url": "https://api.openai.com/v1",
+            },
+            {
+                "model": "llama-7B",
+                "base_url": "http://127.0.0.1:8080",
+                "api_type": "open_ai",
+            }
+        ]
+        ```
+
+            base_config: base config. It can contain both keyword arguments for openai client
+                and additional kwargs.
+        """
+        openai_config, extra_kwargs = self._separate_openai_config(base_config)
+        if type(config_list) is list and len(config_list) == 0:
+            logger.warning("openai client was provided with an empty config_list, which may not be intended.")
+        if config_list:
+            self._clients = [self._client(config, openai_config) for config in config_list]
+            self._config_list = [
+                {**extra_kwargs, **{k: v for k, v in config.items() if k not in self.openai_kwargs}}
+                for config in config_list
+            ]
+        else:
+            self._clients = [self._client(extra_kwargs, openai_config)]
+            self._config_list = [extra_kwargs]
+
+    def _process_for_azure(self, config: Dict, extra_kwargs: Dict, segment: str = "default"):
+        # deal with api_version
+        query_segment = f"{segment}_query"
+        headers_segment = f"{segment}_headers"
+        api_version = extra_kwargs.get("api_version")
+        if api_version is not None and query_segment not in config:
+            config[query_segment] = {"api-version": api_version}
+            if segment == "default":
+                # remove the api_version from extra_kwargs
+                extra_kwargs.pop("api_version")
+        if segment == "extra":
+            return
+        # deal with api_type
+        api_type = extra_kwargs.get("api_type")
+        if api_type is not None and api_type.startswith("azure") and headers_segment not in config:
+            api_key = config.get("api_key", os.environ.get("AZURE_OPENAI_API_KEY"))
+            config[headers_segment] = {"api-key": api_key}
+            # remove the api_type from extra_kwargs
+            extra_kwargs.pop("api_type")
+            # deal with model
+            model = extra_kwargs.get("model")
+            if model is None:
+                return
+            if "gpt-3.5" in model:
+                # hack for azure gpt-3.5
+                extra_kwargs["model"] = model = model.replace("gpt-3.5", "gpt-35")
+            base_url = config.get("base_url")
+            if base_url is None:
+                raise ValueError("to use azure openai api, base_url must be specified.")
+            suffix = f"openai/deployments/{model}"
+            if not base_url.endswith(suffix):
+                config["base_url"] += suffix
+
+    def _separate_openai_config(self, config):
+        """Separate the config into openai_config and extra_kwargs."""
+        openai_config = {k: v for k, v in config.items() if k in self.openai_kwargs}
+        extra_kwargs = {k: v for k, v in config.items() if k not in self.openai_kwargs}
+        self._process_for_azure(openai_config, extra_kwargs)
+        return openai_config, extra_kwargs
+
+    def _separate_create_config(self, config):
+        """Separate the config into create_config and extra_kwargs."""
+        create_config = {k: v for k, v in config.items() if k not in self.extra_kwargs}
+        extra_kwargs = {k: v for k, v in config.items() if k in self.extra_kwargs}
+        return create_config, extra_kwargs
+
+    def _client(self, config, openai_config):
+        """Create a client with the given config to overrdie openai_config,
+        after removing extra kwargs.
+        """
+        openai_config = {**openai_config, **{k: v for k, v in config.items() if k in self.openai_kwargs}}
+        self._process_for_azure(openai_config, config)
+        client = OpenAI(**openai_config)
+        return client
+
+    @classmethod
+    def instantiate(
+        cls,
+        template: str | Callable | None,
+        context: Optional[Dict] = None,
+        allow_format_str_template: Optional[bool] = False,
+    ):
+        if not context or template is None:
+            return template
+        if isinstance(template, str):
+            return template.format(**context) if allow_format_str_template else template
+        return template(context)
+
+    def _construct_create_params(self, create_config: Dict, extra_kwargs: Dict) -> Dict:
+        """Prime the create_config with additional_kwargs."""
+        # Validate the config
+        prompt = create_config.get("prompt")
+        messages = create_config.get("messages")
+        if (prompt is None) == (messages is None):
+            raise ValueError("Either prompt or messages should be in create config but not both.")
+        context = extra_kwargs.get("context")
+        if context is None:
+            # No need to instantiate if no context is provided.
+            return create_config
+        # Instantiate the prompt or messages
+        allow_format_str_template = extra_kwargs.get("allow_format_str_template", False)
+        # Make a copy of the config
+        params = create_config.copy()
+        if prompt is not None:
+            # Instantiate the prompt
+            params["prompt"] = self.instantiate(prompt, context, allow_format_str_template)
+        elif context:
+            # Instantiate the messages
+            params["messages"] = [
+                {
+                    **m,
+                    "content": self.instantiate(m["content"], context, allow_format_str_template),
+                }
+                if m.get("content")
+                else m
+                for m in messages
+            ]
+        return params
+
+    def create(self, **config):
+        """Make a completion for a given config using openai's clients.
+        Besides the kwargs allowed in openai's client, we allow the following additional kwargs.
+        The config in each client will be overriden by the config.
+
+        Args:
+            - context (Dict | None): The context to instantiate the prompt or messages. Default to None.
+                It needs to contain keys that are used by the prompt template or the filter function.
+                E.g., `prompt="Complete the following sentence: {prefix}, context={"prefix": "Today I feel"}`.
+                The actual prompt will be:
+                "Complete the following sentence: Today I feel".
+                More examples can be found at [templating](/docs/Use-Cases/enhanced_inference#templating).
+            - `seed` (int | None) for the cache. Default to 41.
+                An integer seed is useful when implementing "controlled randomness" for the completion.
+                None for no caching.
+            - filter_func (Callable | None): A function that takes in the context and the response
+                and returns a boolean to indicate whether the response is valid. E.g.,
+
+        ```python
+        def yes_or_no_filter(context, response):
+            return context.get("yes_or_no_choice", False) is False or any(
+                text in ["Yes.", "No."] for text in client.extract_text_or_function_call(response)
+            )
+        ```
+
+            - allow_format_str_template (bool | None): Whether to allow format string template in the config. Default to false.
+            - api_version (str | None): The api version. Default to None. E.g., "2023-08-01-preview".
+        """
+        if ERROR:
+            raise ERROR
+        last = len(self._clients) - 1
+        for i, client in enumerate(self._clients):
+            # merge the input config with the i-th config in the config list
+            full_config = {**config, **self._config_list[i]}
+            # separate the config into create_config and extra_kwargs
+            create_config, extra_kwargs = self._separate_create_config(full_config)
+            # process for azure
+            self._process_for_azure(create_config, extra_kwargs, "extra")
+            # construct the create params
+            params = self._construct_create_params(create_config, extra_kwargs)
+            # get the seed, filter_func and context
+            seed = extra_kwargs.get("seed", 41)
+            filter_func = extra_kwargs.get("filter_func")
+            context = extra_kwargs.get("context")
+            with diskcache.Cache(f"{self.cache_path_root}/{seed}") as cache:
+                if seed is not None:
+                    # Try to get the response from cache
+                    key = get_key(params)
+                    response = cache.get(key, None)
+                    if response is not None:
+                        # check the filter
+                        pass_filter = filter_func is None or filter_func(context=context, response=response)
+                        if pass_filter or i == last:
+                            # Return the response if it passes the filter or it is the last client
+                            response.config_id = i
+                            response.pass_filter = pass_filter
+                            # TODO: add response.cost
+                            return response
+                completions = client.chat.completions if "messages" in params else client.completions
+                try:
+                    response = completions.create(**params)
+                except APIError:
+                    logger.debug(f"config {i} failed", exc_info=1)
+                    if i == last:
+                        raise
+                else:
+                    if seed is not None:
+                        # Cache the response
+                        cache.set(key, response)
+                    return response
+
+    @classmethod
+    def extract_text_or_function_call(cls, response: ChatCompletion | Completion) -> List[str]:
+        """Extract the text or function calls from a completion or chat response.
+
+        Args:
+            response (ChatCompletion | Completion): The response from openai.
+
+        Returns:
+            A list of text or function calls in the responses.
+        """
+        choices = response.choices
+        if isinstance(response, Completion):
+            return [choice.text for choice in choices]
+        return [
+            choice.message if choice.message.function_call is not None else choice.message.content for choice in choices
+        ]
+
+
+# TODO: logging
diff --git a/autogen/oai/completion.py b/autogen/oai/completion.py
index a720ccc2446..88d53bca4c0 100644
--- a/autogen/oai/completion.py
+++ b/autogen/oai/completion.py
@@ -13,11 +13,10 @@
 
 try:
     import openai
-    from openai.error import (
-        ServiceUnavailableError,
+    from openai import (
         RateLimitError,
         APIError,
-        InvalidRequestError,
+        BadRequestError,
         APIConnectionError,
         Timeout,
         AuthenticationError,
@@ -27,7 +26,10 @@
 
     ERROR = None
 except ImportError:
-    ERROR = ImportError("please install openai and diskcache to use the autogen.oai subpackage.")
+    ERROR = ImportError(
+        "(Deprecated) The autogen.Completion class requires openai<1 and diskcache. "
+        "Please switch to autogen.OpenAIWrapper for openai>=1."
+    )
     openai_Completion = object
 logger = logging.getLogger(__name__)
 if not logger.handlers:
@@ -38,7 +40,7 @@
 
 
 class Completion(openai_Completion):
-    """A class for OpenAI completion API.
+    """(openai<1) A class for OpenAI completion API.
 
     It also supports: ChatCompletion, Azure OpenAI API.
     """
@@ -198,7 +200,6 @@ def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_ca
         Try cache first. If not found, call the openai api. If the api call fails, retry after retry_wait_time.
         """
         config = config.copy()
-        openai.api_key_path = config.pop("api_key_path", openai.api_key_path)
         key = get_key(config)
         if use_cache:
             response = cls._cache.get(key, None)
@@ -222,10 +223,7 @@ def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_ca
                     response = openai_completion.create(**config)
                 else:
                     response = openai_completion.create(request_timeout=request_timeout, **config)
-            except (
-                ServiceUnavailableError,
-                APIConnectionError,
-            ):
+            except APIConnectionError:
                 # transient error
                 logger.info(f"retrying in {retry_wait_time} seconds...", exc_info=1)
                 sleep(retry_wait_time)
@@ -261,7 +259,7 @@ def _get_response(cls, config: Dict, raise_on_ratelimit_or_timeout=False, use_ca
                         f"Failed to get response from openai api due to getting RateLimitError or Timeout for {max_retry_period} seconds."
                     )
                     return response
-            except InvalidRequestError:
+            except BadRequestError:
                 if "azure" in config.get("api_type", openai.api_type) and "model" in config:
                     # azure api uses "engine" instead of "model"
                     config["engine"] = config.pop("model").replace("gpt-3.5-turbo", "gpt-35-turbo")
@@ -572,6 +570,10 @@ def eval_func(responses, **data):
             dict: The optimized hyperparameter setting.
             tune.ExperimentAnalysis: The tuning results.
         """
+        logger.warning(
+            "tuning via Completion.tune is deprecated in pyautogen v0.2 and openai>=1. "
+            "flaml.tune supports tuning more generically."
+        )
         if ERROR:
             raise ERROR
         space = cls.default_search_space.copy()
@@ -708,7 +710,7 @@ def create(
         context: Optional[Dict] = None,
         use_cache: Optional[bool] = True,
         config_list: Optional[List[Dict]] = None,
-        filter_func: Optional[Callable[[Dict, Dict, Dict], bool]] = None,
+        filter_func: Optional[Callable[[Dict, Dict], bool]] = None,
         raise_on_ratelimit_or_timeout: Optional[bool] = True,
         allow_format_str_template: Optional[bool] = False,
         **config,
@@ -735,18 +737,18 @@ def create(
                     "model": "gpt-4",
                     "api_key": os.environ.get("AZURE_OPENAI_API_KEY"),
                     "api_type": "azure",
-                    "api_base": os.environ.get("AZURE_OPENAI_API_BASE"),
+                    "base_url": os.environ.get("AZURE_OPENAI_API_BASE"),
                     "api_version": "2023-03-15-preview",
                 },
                 {
                     "model": "gpt-3.5-turbo",
                     "api_key": os.environ.get("OPENAI_API_KEY"),
                     "api_type": "open_ai",
-                    "api_base": "https://api.openai.com/v1",
+                    "base_url": "https://api.openai.com/v1",
                 },
                 {
                     "model": "llama-7B",
-                    "api_base": "http://127.0.0.1:8080",
+                    "base_url": "http://127.0.0.1:8080",
                     "api_type": "open_ai",
                 }
             ],
@@ -754,7 +756,7 @@ def create(
         )
         ```
 
-            filter_func (Callable, Optional): A function that takes in the context, the config and the response and returns a boolean to indicate whether the response is valid. E.g.,
+            filter_func (Callable, Optional): A function that takes in the context and the response and returns a boolean to indicate whether the response is valid. E.g.,
 
         ```python
         def yes_or_no_filter(context, config, response):
@@ -780,6 +782,11 @@ def yes_or_no_filter(context, config, response):
                 - `config_id`: the index of the config in the config_list that is used to generate the response.
                 - `pass_filter`: whether the response passes the filter function. None if no filter is provided.
         """
+        logger.warning(
+            "Completion.create is deprecated in pyautogen v0.2 and openai>=1. "
+            "The new openai requires initiating a client for inference. "
+            "Please refer to https://microsoft.github.io/autogen/docs/Use-Cases/enhanced_inference#api-unification"
+        )
         if ERROR:
             raise ERROR
 
@@ -808,16 +815,14 @@ def yes_or_no_filter(context, config, response):
                     )
                     if response == -1:
                         return response
-                    pass_filter = filter_func is None or filter_func(
-                        context=context, base_config=config, response=response
-                    )
+                    pass_filter = filter_func is None or filter_func(context=context, response=response)
                     if pass_filter or i == last:
                         response["cost"] = cost + response["cost"]
                         response["config_id"] = i
                         response["pass_filter"] = pass_filter
                         return response
                     cost += response["cost"]
-                except (AuthenticationError, RateLimitError, Timeout, InvalidRequestError):
+                except (AuthenticationError, RateLimitError, Timeout, BadRequestError):
                     logger.debug(f"failed with config {i}", exc_info=1)
                     if i == last:
                         raise
@@ -1166,6 +1171,12 @@ def start_logging(
                 while the compact history dict has a linear size.
             reset_counter (bool): whether to reset the counter of the number of API calls.
         """
+        logger.warning(
+            "logging via Completion.start_logging is deprecated in pyautogen v0.2. "
+            "logging via OpenAIWrapper will be added back in a future release."
+        )
+        if ERROR:
+            raise ERROR
         cls._history_dict = {} if history_dict is None else history_dict
         cls._history_compact = compact
         cls._count_create = 0 if reset_counter or cls._count_create is None else cls._count_create
@@ -1177,7 +1188,7 @@ def stop_logging(cls):
 
 
 class ChatCompletion(Completion):
-    """A class for OpenAI API ChatCompletion. Share the same API as Completion."""
+    """(openai<1) A class for OpenAI API ChatCompletion. Share the same API as Completion."""
 
     default_search_space = Completion.default_search_space.copy()
     default_search_space["model"] = tune.choice(["gpt-3.5-turbo", "gpt-4"])
diff --git a/autogen/oai/openai_utils.py b/autogen/oai/openai_utils.py
index cbae458c59c..61dd330169c 100644
--- a/autogen/oai/openai_utils.py
+++ b/autogen/oai/openai_utils.py
@@ -7,7 +7,7 @@
 from dotenv import find_dotenv, load_dotenv
 
 
-NON_CACHE_KEY = ["api_key", "api_base", "api_type", "api_version"]
+NON_CACHE_KEY = ["api_key", "base_url", "api_type", "api_version"]
 
 
 def get_key(config):
@@ -33,13 +33,13 @@ def get_key(config):
 
 
 def get_config_list(
-    api_keys: List, api_bases: Optional[List] = None, api_type: Optional[str] = None, api_version: Optional[str] = None
+    api_keys: List, base_urls: Optional[List] = None, api_type: Optional[str] = None, api_version: Optional[str] = None
 ) -> List[Dict]:
     """Get a list of configs for openai api calls.
 
     Args:
         api_keys (list): The api keys for openai api calls.
-        api_bases (list, optional): The api bases for openai api calls.
+        base_urls (list, optional): The api bases for openai api calls.
         api_type (str, optional): The api type for openai api calls.
         api_version (str, optional): The api version for openai api calls.
     """
@@ -48,8 +48,8 @@ def get_config_list(
         if not api_key.strip():
             continue
         config = {"api_key": api_key}
-        if api_bases:
-            config["api_base"] = api_bases[i]
+        if base_urls:
+            config["base_url"] = base_urls[i]
         if api_type:
             config["api_type"] = api_type
         if api_version:
@@ -109,7 +109,7 @@ def config_list_openai_aoai(
             # Assuming Azure OpenAI api keys in os.environ["AZURE_OPENAI_API_KEY"], in separated lines
             api_keys=os.environ.get("AZURE_OPENAI_API_KEY", "").split("\n"),
             # Assuming Azure OpenAI api bases in os.environ["AZURE_OPENAI_API_BASE"], in separated lines
-            api_bases=os.environ.get("AZURE_OPENAI_API_BASE", "").split("\n"),
+            base_urls=os.environ.get("AZURE_OPENAI_API_BASE", "").split("\n"),
             api_type="azure",
             api_version="2023-07-01-preview",  # change if necessary
         )
@@ -121,7 +121,7 @@ def config_list_openai_aoai(
             # Assuming OpenAI API_KEY in os.environ["OPENAI_API_KEY"]
             api_keys=os.environ.get("OPENAI_API_KEY", "").split("\n"),
             # "api_type": "open_ai",
-            # "api_base": "https://api.openai.com/v1",
+            # "base_url": "https://api.openai.com/v1",
         )
         if exclude != "openai"
         else []
@@ -248,7 +248,7 @@ def config_list_from_json(
 
 
 def get_config(
-    api_key: str, api_base: Optional[str] = None, api_type: Optional[str] = None, api_version: Optional[str] = None
+    api_key: str, base_url: Optional[str] = None, api_type: Optional[str] = None, api_version: Optional[str] = None
 ) -> Dict:
     """
     Construct a configuration dictionary with the provided API configurations.
@@ -261,12 +261,12 @@ def get_config(
             "api_key_env_var": "ANOTHER_API_KEY",
             "api_type": "aoai",
             "api_version": "v2",
-            "api_base": "https://api.someotherapi.com"
+            "base_url": "https://api.someotherapi.com"
         }
     }
     Args:
         api_key (str): The API key used for authenticating API requests.
-        api_base (str, optional): The base URL of the API. Defaults to None.
+        base_url (str, optional): The base URL of the API. Defaults to None.
         api_type (str, optional): The type or kind of API. Defaults to None.
         api_version (str, optional): The API version. Defaults to None.
 
@@ -274,8 +274,8 @@ def get_config(
         Dict: A dictionary containing the API configurations.
     """
     config = {"api_key": api_key}
-    if api_base:
-        config["api_base"] = api_base
+    if base_url:
+        config["base_url"] = base_url
     if api_type:
         config["api_type"] = api_type
     if api_version:
@@ -302,7 +302,7 @@ def config_list_from_dotenv(
                                            If a string is provided as configuration, it is considered as an environment
                                            variable name storing the API key.
                                            If a dict is provided, it should contain at least 'api_key_env_var' key,
-                                           and optionally other API configurations like 'api_base', 'api_type', and 'api_version'.
+                                           and optionally other API configurations like 'base_url', 'api_type', and 'api_version'.
                                            Defaults to a basic map with 'gpt-4' and 'gpt-3.5-turbo' mapped to 'OPENAI_API_KEY'.
         filter_dict (dict, optional): A dictionary containing the models to be loaded.
                                       Containing a 'model' key mapped to a set of model names to be loaded.
diff --git a/autogen/token_count_utils.py b/autogen/token_count_utils.py
index fd9d61a10a1..9e254932faf 100644
--- a/autogen/token_count_utils.py
+++ b/autogen/token_count_utils.py
@@ -1,7 +1,7 @@
-import tiktoken
-from typing import List, Union, Dict, Tuple
+from typing import List, Union, Dict
 import logging
 import json
+import tiktoken
 
 
 logger = logging.getLogger(__name__)
diff --git a/autogen/version.py b/autogen/version.py
index fb69db9cf4d..a7621268bde 100644
--- a/autogen/version.py
+++ b/autogen/version.py
@@ -1 +1 @@
-__version__ = "0.1.14"
+__version__ = "0.2.0b1"
diff --git a/notebook/agentchat_MathChat.ipynb b/notebook/agentchat_MathChat.ipynb
index 9dd90fb0635..4159784bccf 100644
--- a/notebook/agentchat_MathChat.ipynb
+++ b/notebook/agentchat_MathChat.ipynb
@@ -91,14 +91,14 @@
     "    {\n",
     "        'model': 'gpt-4',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
     "    {\n",
     "        'model': 'gpt-3.5-turbo',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
@@ -128,14 +128,12 @@
    "source": [
     "from autogen.agentchat.contrib.math_user_proxy_agent import MathUserProxyAgent\n",
     "\n",
-    "autogen.ChatCompletion.start_logging()\n",
-    "\n",
     "# 1. create an AssistantAgent instance named \"assistant\"\n",
     "assistant = autogen.AssistantAgent(\n",
     "    name=\"assistant\", \n",
     "    system_message=\"You are a helpful assistant.\",\n",
     "    llm_config={\n",
-    "        \"request_timeout\": 600,\n",
+    "        \"timeout\": 600,\n",
     "        \"seed\": 42,\n",
     "        \"config_list\": config_list,\n",
     "    }\n",
diff --git a/notebook/agentchat_RetrieveChat.ipynb b/notebook/agentchat_RetrieveChat.ipynb
index 36605e52160..e0aa629cb75 100644
--- a/notebook/agentchat_RetrieveChat.ipynb
+++ b/notebook/agentchat_RetrieveChat.ipynb
@@ -117,14 +117,14 @@
     "    {\n",
     "        'model': 'gpt-4',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
     "    {\n",
     "        'model': 'gpt-3.5-turbo',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
@@ -179,14 +179,12 @@
     "from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent\n",
     "import chromadb\n",
     "\n",
-    "autogen.ChatCompletion.start_logging()\n",
-    "\n",
     "# 1. create an RetrieveAssistantAgent instance named \"assistant\"\n",
     "assistant = RetrieveAssistantAgent(\n",
     "    name=\"assistant\", \n",
     "    system_message=\"You are a helpful assistant.\",\n",
     "    llm_config={\n",
-    "        \"request_timeout\": 600,\n",
+    "        \"timeout\": 600,\n",
     "        \"seed\": 42,\n",
     "        \"config_list\": config_list,\n",
     "    },\n",
diff --git a/notebook/agentchat_auto_feedback_from_code_execution.ipynb b/notebook/agentchat_auto_feedback_from_code_execution.ipynb
index 564c7e2c8db..f3c027855bb 100644
--- a/notebook/agentchat_auto_feedback_from_code_execution.ipynb
+++ b/notebook/agentchat_auto_feedback_from_code_execution.ipynb
@@ -91,14 +91,14 @@
     "    {\n",
     "        'model': 'gpt-4',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
     "    {\n",
     "        'model': 'gpt-4-32k',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
diff --git a/notebook/agentchat_chess.ipynb b/notebook/agentchat_chess.ipynb
index a06265d21bf..68ab8063fe5 100644
--- a/notebook/agentchat_chess.ipynb
+++ b/notebook/agentchat_chess.ipynb
@@ -105,14 +105,14 @@
     "    {\n",
     "        'model': 'gpt-4',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
     "    {\n",
     "        'model': 'gpt-4-32k',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
@@ -1010,7 +1010,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.17"
+   "version": "3.11.4"
   },
   "orig_nbformat": 4
  },
diff --git a/notebook/agentchat_function_call.ipynb b/notebook/agentchat_function_call.ipynb
index 3d73066f69a..836a3b0af9c 100644
--- a/notebook/agentchat_function_call.ipynb
+++ b/notebook/agentchat_function_call.ipynb
@@ -253,7 +253,7 @@
     "        },\n",
     "    ],\n",
     "    \"config_list\": config_list,\n",
-    "    \"request_timeout\": 120,\n",
+    "    \"timeout\": 120,\n",
     "}\n",
     "chatbot = autogen.AssistantAgent(\n",
     "    name=\"chatbot\",\n",
diff --git a/notebook/agentchat_groupchat.ipynb b/notebook/agentchat_groupchat.ipynb
index ac3a74fca59..22e9067f384 100644
--- a/notebook/agentchat_groupchat.ipynb
+++ b/notebook/agentchat_groupchat.ipynb
@@ -93,14 +93,14 @@
     "    {\n",
     "        'model': 'gpt-4',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
     "    {\n",
     "        'model': 'gpt-4-32k',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
diff --git a/notebook/agentchat_groupchat_RAG.ipynb b/notebook/agentchat_groupchat_RAG.ipynb
index fd12cbe8c9b..8d3848f01e9 100644
--- a/notebook/agentchat_groupchat_RAG.ipynb
+++ b/notebook/agentchat_groupchat_RAG.ipynb
@@ -88,9 +88,8 @@
     "        \"api_key\": \"<your OpenAI API key>\",\n",
     "    },  # OpenAI API endpoint for gpt-4\n",
     "    {\n",
-    "        \"engine\": \"gpt-35-turbo-0631\", \n",
     "        \"model\": \"gpt-35-turbo-0631\",  # 0631 or newer is needed to use functions\n",
-    "        \"api_base\": \"<your Azure OpenAI API base>\", \n",
+    "        \"base_url\": \"<your Azure OpenAI API base>\", \n",
     "        \"api_type\": \"azure\", \n",
     "        \"api_version\": \"2023-07-01-preview\", # 2023-07-01-preview or newer is needed to use functions\n",
     "        \"api_key\": \"<your Azure OpenAI API key>\"\n",
@@ -122,13 +121,13 @@
     "import chromadb\n",
     "\n",
     "llm_config = {\n",
-    "    \"request_timeout\": 60,\n",
+    "    \"timeout\": 60,\n",
     "    \"seed\": 42,\n",
     "    \"config_list\": config_list,\n",
     "    \"temperature\": 0,\n",
     "}\n",
     "\n",
-    "autogen.ChatCompletion.start_logging()\n",
+    "# autogen.ChatCompletion.start_logging()\n",
     "termination_msg = lambda x: isinstance(x, dict) and \"TERMINATE\" == str(x.get(\"content\", \"\"))[-9:].upper()\n",
     "\n",
     "boss = autogen.UserProxyAgent(\n",
@@ -251,7 +250,7 @@
     "            },\n",
     "        ],\n",
     "        \"config_list\": config_list,\n",
-    "        \"request_timeout\": 60,\n",
+    "        \"timeout\": 60,\n",
     "        \"seed\": 42,\n",
     "    }\n",
     "\n",
diff --git a/notebook/agentchat_groupchat_research.ipynb b/notebook/agentchat_groupchat_research.ipynb
index 9d9c2c3dc3f..79cda682a32 100644
--- a/notebook/agentchat_groupchat_research.ipynb
+++ b/notebook/agentchat_groupchat_research.ipynb
@@ -79,14 +79,14 @@
     "    {\n",
     "        'model': 'gpt-4-32k',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
     "    {\n",
     "        'model': 'gpt-4-32k-0314',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
@@ -116,7 +116,7 @@
     "    \"seed\": 42,  # change the seed for different trials\n",
     "    \"temperature\": 0,\n",
     "    \"config_list\": config_list_gpt4,\n",
-    "    \"request_timeout\": 120,\n",
+    "    \"timeout\": 120,\n",
     "}\n",
     "user_proxy = autogen.UserProxyAgent(\n",
     "   name=\"Admin\",\n",
diff --git a/notebook/agentchat_groupchat_vis.ipynb b/notebook/agentchat_groupchat_vis.ipynb
index c780a588a6b..b4f492aa174 100644
--- a/notebook/agentchat_groupchat_vis.ipynb
+++ b/notebook/agentchat_groupchat_vis.ipynb
@@ -91,14 +91,14 @@
     "    {\n",
     "        'model': 'gpt-4',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
     "    {\n",
     "        'model': 'gpt-4-32k',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
diff --git a/notebook/agentchat_human_feedback.ipynb b/notebook/agentchat_human_feedback.ipynb
index 0119fb48f47..0d7fa3c9778 100644
--- a/notebook/agentchat_human_feedback.ipynb
+++ b/notebook/agentchat_human_feedback.ipynb
@@ -86,14 +86,14 @@
     "    {\n",
     "        'model': 'gpt-4',\n",
     "        'api_key': '<your first Azure OpenAI API key here>',\n",
-    "        'api_base': '<your first Azure OpenAI API base here>',\n",
+    "        'base_url': '<your first Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },  # Azure OpenAI API endpoint for gpt-4\n",
     "    {\n",
     "        'model': 'gpt-4',\n",
     "        'api_key': '<your second Azure OpenAI API key here>',\n",
-    "        'api_base': '<your second Azure OpenAI API base here>',\n",
+    "        'base_url': '<your second Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },  # another Azure OpenAI API endpoint for gpt-4\n",
@@ -104,14 +104,14 @@
     "    {\n",
     "        'model': 'gpt-3.5-turbo',\n",
     "        'api_key': '<your first Azure OpenAI API key here>',\n",
-    "        'api_base': '<your first Azure OpenAI API base here>',\n",
+    "        'base_url': '<your first Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },  # Azure OpenAI API endpoint for gpt-3.5-turbo\n",
     "    {\n",
     "        'model': 'gpt-3.5-turbo',\n",
     "        'api_key': '<your second Azure OpenAI API key here>',\n",
-    "        'api_base': '<your second Azure OpenAI API base here>',\n",
+    "        'base_url': '<your second Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },  # another Azure OpenAI API endpoint for gpt-3.5-turbo\n",
@@ -152,10 +152,7 @@
     "    name=\"user_proxy\",\n",
     "    human_input_mode=\"ALWAYS\",\n",
     "    is_termination_msg=lambda x: x.get(\"content\", \"\").rstrip().endswith(\"TERMINATE\"),\n",
-    ")\n",
-    "\n",
-    "# the purpose of the following line is to log the conversation history\n",
-    "autogen.ChatCompletion.start_logging()\n"
+    ")\n"
    ]
   },
   {
@@ -372,24 +369,16 @@
     "\n",
     "The human user can provide feedback at each step. When the human user didn't provide feedback, the code was executed. The executed results and error messages are returned to the assistant, and the assistant is able to modify the code based on the feedback. In the end, the task is complete and a \"TERMINATE\" signal is sent from the assistant. The user skipped feedback in the end and the conversation is finished.\n",
     "\n",
-    "After the conversation is finished, we can save the conversation log between the two agents. The log can be accessed from `autogen.ChatCompletion.logged_history`."
+    "After the conversation is finished, we can save the conversations between the two agents. The conversation can be accessed from `user_proxy.chat_messages`."
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "{'[{\"content\": \"You are a helpful AI assistant.\\\\nSolve tasks using your coding and language skills.\\\\nIn the following cases, suggest python code (in a python coding block) or shell script (in a sh coding block) for the user to execute.\\\\n    1. When you need to collect info, use the code to output the info you need, for example, browse or search the web, download/read a file, print the content of a webpage or a file, get the current date/time. After sufficient info is printed and the task is ready to be solved based on your language skill, you can solve the task by yourself.\\\\n    2. When you need to perform some task with code, use the code to perform the task and output the result. Finish the task smartly.\\\\nSolve the task step by step if you need to. If a plan is not provided, explain your plan first. Be clear which step uses code, and which step uses your language skill.\\\\nWhen using code, you must indicate the script type in the code block. The user cannot provide any other feedback or perform any other action beyond executing the code you suggest. The user can\\'t modify your code. So do not suggest incomplete code which requires users to modify. Don\\'t use a code block if it\\'s not intended to be executed by the user.\\\\nIf you want the user to save the code in a file before executing it, put # filename: <filename> inside the code block as the first line. Don\\'t include multiple code blocks in one response. Do not ask users to copy and paste the result. Instead, use \\'print\\' function for the output when relevant. Check the execution result returned by the user.\\\\nIf the result indicates there is an error, fix the error and output the code again. Suggest the full code instead of partial code or code changes. If the error can\\'t be fixed or if the task is not solved even after the code is executed successfully, analyze the problem, revisit your assumption, collect additional info you need, and think of a different approach to try.\\\\nWhen you find an answer, verify the answer carefully. Include verifiable evidence in your response if possible.\\\\nReply \\\\\"TERMINATE\\\\\" in the end when everything is done.\\\\n    \", \"role\": \"system\"}, {\"content\": \"\\\\nFind $a + b + c$, given that $x+y \\\\\\\\neq -1$ and \\\\n\\\\\\\\begin{align}\\\\n\\\\tax + by + c & = x + 7,\\\\\\\\\\\\n\\\\ta + bx + cy & = 2x + 6y,\\\\\\\\\\\\n\\\\tay + b + cx & = 4x + y.\\\\n\\\\\\\\end{align}.\\\\n\", \"role\": \"user\"}, {\"content\": \"To solve the system of linear equations, I will use NumPy library in Python to calculate the coefficients and constants and then use the linalg.solve() function to find the values of a, b, and c. Finally, I will add them up to find $a + b + c$.\\\\n\\\\nStep 1: Define the coefficients and constants\\\\n\\\\nStep 2: Solve the system of equations using NumPy\\\\n\\\\nStep 3: Calculate the sum of a, b, and c\", \"role\": \"assistant\"}, {\"content\": \"use sympy\", \"role\": \"user\"}, {\"content\": \"Sure, I will use Sympy to solve the system of linear equations.\\\\n\\\\nStep 1: Import Sympy library and define the symbols\\\\n``` python\\\\nfrom sympy import symbols, Eq, solve\\\\n\\\\na, b, c, x, y = symbols(\\'a b c x y\\')\\\\n```\\\\n\\\\nStep 2: Define the equations\\\\n``` python\\\\neq1 = Eq(a * x + b * y + c, x + 7)\\\\neq2 = Eq(a + b * x + c * y, 2 * x + 6 * y)\\\\neq3 = Eq(a * y + b + c * x, 4 * x + y)\\\\n```\\\\n\\\\nStep 3: Solve the system of equations\\\\n``` python\\\\nsolutions = solve((eq1, eq2, eq3), (a, b, c))\\\\n```\\\\n\\\\nStep 4: Calculate the sum of a, b, and c\\\\n``` python\\\\na_value, b_value, c_value = solutions[a], solutions[b], solutions[c]\\\\nsum_abc = a_value + b_value + c_value\\\\n```\\\\n\\\\n\", \"role\": \"assistant\"}, {\"content\": \"exitcode: 1 (execution failed)\\\\nCode output: \\\\n  File \\\\\"\\\\\", line 2\\\\n    Step 2: Define the equations\\\\n         ^\\\\nSyntaxError: invalid syntax\\\\n\", \"role\": \"user\"}, {\"content\": \"Apologies for the confusion. I should have provided the code in a single block without the step-by-step comments. Here is the code to solve the system of linear equations using Sympy:\\\\n\\\\n```python\\\\nfrom sympy import symbols, Eq, solve\\\\n\\\\na, b, c, x, y = symbols(\\'a b c x y\\')\\\\n\\\\neq1 = Eq(a * x + b * y + c, x + 7)\\\\neq2 = Eq(a + b * x + c * y, 2 * x + 6 * y)\\\\neq3 = Eq(a * y + b + c * x, 4 * x + y)\\\\n\\\\nsolutions = solve((eq1, eq2, eq3), (a, b, c))\\\\n\\\\na_value, b_value, c_value = solutions[a], solutions[b], solutions[c]\\\\nsum_abc = a_value + b_value + c_value\\\\nprint(sum_abc)\\\\n```\\\\n\\\\nPlease run the provided code to find the sum of a, b, and c.\", \"role\": \"assistant\"}, {\"content\": \"exitcode: 0 (execution succeeded)\\\\nCode output: \\\\n(2*x**2 - 3*x + y)/(x**2 - x*y - x + y**2 - y + 1) + (x**2 - 3*x*y + 2*x + y**2 - y)/(x**2 - x*y - x + y**2 - y + 1) + (4*x**2 - 4*x*y - 6*x + 6*y**2 - 7*y + 7)/(x**2 - x*y - x + y**2 - y + 1)\\\\n\", \"role\": \"user\"}, {\"content\": \"I apologize for the confusion. The code provided the sum of a, b, and c in a symbolic representation. To get an answer, we need to put some constraints on x and y for which x + y is not equal to -1.\\\\n\\\\nWe can choose any values for x and y that don\\'t satisfy x + y = -1, and then substitute them into the equation to find the corresponding sum of a, b, and c. Let\\'s use x = 1 and y = 0, which satisfy the given condition.\\\\n\\\\nHere is the updated code with the chosen values for x and y:\\\\n\\\\n```python\\\\nfrom sympy import symbols, Eq, solve\\\\n\\\\na, b, c, x, y = symbols(\\'a b c x y\\')\\\\n\\\\neq1 = Eq(a * x + b * y + c, x + 7)\\\\neq2 = Eq(a + b * x + c * y, 2 * x + 6 * y)\\\\neq3 = Eq(a * y + b + c * x, 4 * x + y)\\\\n\\\\nsolutions = solve((eq1, eq2, eq3), (a, b, c))\\\\n\\\\nchosen_x = 1\\\\nchosen_y = 0\\\\n\\\\na_value, b_value, c_value = solutions[a].subs(x, chosen_x).subs(y, chosen_y), solutions[b].subs(x, chosen_x).subs(y, chosen_y), solutions[c].subs(x, chosen_x).subs(y, chosen_y)\\\\nsum_abc = a_value + b_value + c_value\\\\nprint(sum_abc)\\\\n```\\\\n\\\\nPlease run the provided code again to find the sum of a, b, and c.\", \"role\": \"assistant\"}, {\"content\": \"exitcode: 0 (execution succeeded)\\\\nCode output: \\\\n7\\\\n\", \"role\": \"user\"}, {\"content\": \"Great! Now we have the sum of a, b, and c. The answer is:\\\\n\\\\n$$a + b + c = 7$$\\\\n\\\\nTERMINATE\", \"role\": \"assistant\"}]': {'created_at': [0, 1, 2, 3, 4], 'cost': [0.022019999999999998, 0.03305999999999999, 0.04019999999999999, 0.058589999999999996, 0.050969999999999994]}}\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "print(autogen.ChatCompletion.logged_history)"
+    "print(user_proxy.chat_messages[assistant])"
    ]
   },
   {
@@ -400,7 +389,7 @@
    "source": [
     "import json\n",
     "\n",
-    "json.dump(autogen.ChatCompletion.logged_history, open(\"conversations.json\", \"w\"), indent=2)"
+    "json.dump(user_proxy.chat_messages[assistant], open(\"conversations.json\", \"w\"), indent=2)"
    ]
   }
  ],
diff --git a/notebook/agentchat_langchain.ipynb b/notebook/agentchat_langchain.ipynb
index a756bc3afd1..6a3a183e7bf 100644
--- a/notebook/agentchat_langchain.ipynb
+++ b/notebook/agentchat_langchain.ipynb
@@ -344,7 +344,7 @@
         "      generate_llm_config(read_file_tool),\n",
         "  ],\n",
         "  \"config_list\": config_list,  # Assuming you have this defined elsewhere\n",
-        "  \"request_timeout\": 120,\n",
+        "  \"timeout\": 120,\n",
         "}\n",
         "\n",
         "user_proxy = autogen.UserProxyAgent(\n",
@@ -697,7 +697,7 @@
         "llm_config = {\n",
         "  \"functions\": tools,\n",
         "  \"config_list\": config_list,  # Assuming you have this defined elsewhere\n",
-        "  \"request_timeout\": 120,\n",
+        "  \"timeout\": 120,\n",
         "}\n",
         "\n",
         "user_proxy = autogen.UserProxyAgent(\n",
diff --git a/notebook/agentchat_lmm_llava.ipynb b/notebook/agentchat_lmm_llava.ipynb
index a3a51d3abfb..d42fb53a0f5 100644
--- a/notebook/agentchat_lmm_llava.ipynb
+++ b/notebook/agentchat_lmm_llava.ipynb
@@ -93,16 +93,14 @@
    "source": [
     "import requests\n",
     "import json\n",
-    "import os\n",
     "from llava.conversation import default_conversation as conv\n",
     "from llava.conversation import Conversation\n",
     "\n",
-    "from typing import Any, Callable, Dict, List, Optional, Tuple, Type, Union\n",
+    "from typing import Dict, List, Optional, Tuple, Union\n",
     "\n",
     "import autogen\n",
-    "from autogen import AssistantAgent, Agent, UserProxyAgent, ConversableAgent\n",
-    "from termcolor import colored\n",
-    "import random"
+    "from autogen import AssistantAgent, Agent, ConversableAgent, OpenAIWrapper\n",
+    "from termcolor import colored"
    ]
   },
   {
@@ -650,7 +648,7 @@
     "            content = message.get(\"content\")\n",
     "            if content is not None:\n",
     "                if \"context\" in message:\n",
-    "                    content = oai.ChatCompletion.instantiate(\n",
+    "                    content = OpenAIWrapper.instantiate(\n",
     "                        content,\n",
     "                        message[\"context\"],\n",
     "                        self.llm_config and self.llm_config.get(\"allow_format_str_template\", False),\n",
diff --git a/notebook/agentchat_planning.ipynb b/notebook/agentchat_planning.ipynb
index b0fa24425b3..0d5679ce6a8 100644
--- a/notebook/agentchat_planning.ipynb
+++ b/notebook/agentchat_planning.ipynb
@@ -95,14 +95,14 @@
     "    {\n",
     "        'model': 'gpt-4',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-07-01-preview',\n",
     "    },  # Azure OpenAI API endpoint for gpt-4\n",
     "    {\n",
     "        'model': 'gpt-4-32k',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-07-01-preview',\n",
     "    },  # Azure OpenAI API endpoint for gpt-4-32k\n",
@@ -161,7 +161,7 @@
     "    name=\"assistant\",\n",
     "    llm_config={\n",
     "        \"temperature\": 0,\n",
-    "        \"request_timeout\": 600,\n",
+    "        \"timeout\": 600,\n",
     "        \"seed\": 42,\n",
     "        \"model\": \"gpt-4-0613\",\n",
     "        \"config_list\": autogen.config_list_openai_aoai(exclude=\"aoai\"),\n",
diff --git a/notebook/agentchat_qdrant_RetrieveChat.ipynb b/notebook/agentchat_qdrant_RetrieveChat.ipynb
index 42a5cf82f4c..b05848c1c5d 100644
--- a/notebook/agentchat_qdrant_RetrieveChat.ipynb
+++ b/notebook/agentchat_qdrant_RetrieveChat.ipynb
@@ -95,14 +95,14 @@
     "    {\n",
     "        'model': 'gpt-4',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
     "    {\n",
     "        'model': 'gpt-3.5-turbo',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
@@ -159,14 +159,12 @@
     "from autogen.agentchat.contrib.qdrant_retrieve_user_proxy_agent import QdrantRetrieveUserProxyAgent\n",
     "from qdrant_client import QdrantClient\n",
     "\n",
-    "autogen.ChatCompletion.start_logging()\n",
-    "\n",
     "# 1. create an RetrieveAssistantAgent instance named \"assistant\"\n",
     "assistant = RetrieveAssistantAgent(\n",
     "    name=\"assistant\", \n",
     "    system_message=\"You are a helpful assistant.\",\n",
     "    llm_config={\n",
-    "        \"request_timeout\": 600,\n",
+    "        \"timeout\": 600,\n",
     "        \"seed\": 42,\n",
     "        \"config_list\": config_list,\n",
     "    },\n",
diff --git a/notebook/agentchat_stream.ipynb b/notebook/agentchat_stream.ipynb
index cfce4de54f9..35666f03ecd 100644
--- a/notebook/agentchat_stream.ipynb
+++ b/notebook/agentchat_stream.ipynb
@@ -86,14 +86,14 @@
     "    {\n",
     "        'model': 'gpt-4',\n",
     "        'api_key': '<your first Azure OpenAI API key here>',\n",
-    "        'api_base': '<your first Azure OpenAI API base here>',\n",
+    "        'base_url': '<your first Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },  # Azure OpenAI API endpoint for gpt-4\n",
     "    {\n",
     "        'model': 'gpt-4',\n",
     "        'api_key': '<your second Azure OpenAI API key here>',\n",
-    "        'api_base': '<your second Azure OpenAI API base here>',\n",
+    "        'base_url': '<your second Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },  # another Azure OpenAI API endpoint for gpt-4\n",
@@ -104,14 +104,14 @@
     "    {\n",
     "        'model': 'gpt-3.5-turbo',\n",
     "        'api_key': '<your first Azure OpenAI API key here>',\n",
-    "        'api_base': '<your first Azure OpenAI API base here>',\n",
+    "        'base_url': '<your first Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },  # Azure OpenAI API endpoint for gpt-3.5-turbo\n",
     "    {\n",
     "        'model': 'gpt-3.5-turbo',\n",
     "        'api_key': '<your second Azure OpenAI API key here>',\n",
-    "        'api_base': '<your second Azure OpenAI API base here>',\n",
+    "        'base_url': '<your second Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },  # another Azure OpenAI API endpoint for gpt-3.5-turbo\n",
@@ -228,7 +228,7 @@
     "assistant = autogen.AssistantAgent(\n",
     "    name=\"assistant\",\n",
     "    llm_config={\n",
-    "        \"request_timeout\": 600,\n",
+    "        \"timeout\": 600,\n",
     "        \"seed\": 41,\n",
     "        \"config_list\": config_list,\n",
     "        \"temperature\": 0,\n",
diff --git a/notebook/agentchat_teachability.ipynb b/notebook/agentchat_teachability.ipynb
index 54f73fbcb83..66439fb91ea 100644
--- a/notebook/agentchat_teachability.ipynb
+++ b/notebook/agentchat_teachability.ipynb
@@ -95,14 +95,14 @@
     "    {\n",
     "        'model': 'gpt-4',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
     "    {\n",
     "        'model': 'gpt-4-32k',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
@@ -142,7 +142,7 @@
     "from autogen import UserProxyAgent\n",
     "\n",
     "llm_config = {\n",
-    "    \"request_timeout\": 60,\n",
+    "    \"timeout\": 60,\n",
     "    \"config_list\": config_list,\n",
     "    \"use_cache\": True,  # Use False to explore LLM non-determinism.\n",
     "}\n",
@@ -160,8 +160,6 @@
     "    def colored(x, *args, **kwargs):\n",
     "        return x\n",
     "    \n",
-    "autogen.ChatCompletion.start_logging()\n",
-    "\n",
     "teachable_agent = TeachableAgent(\n",
     "    name=\"teachableagent\",\n",
     "    llm_config=llm_config,\n",
diff --git a/notebook/agentchat_teaching.ipynb b/notebook/agentchat_teaching.ipynb
index 0910aefa3e1..dcee7cd5b02 100644
--- a/notebook/agentchat_teaching.ipynb
+++ b/notebook/agentchat_teaching.ipynb
@@ -58,14 +58,14 @@
     "    {\n",
     "        \"model\": \"gpt-4\",\n",
     "        \"api_key\": \"<your Azure OpenAI API key here>\",\n",
-    "        \"api_base\": \"<your Azure OpenAI API base here>\",\n",
+    "        \"base_url\": \"<your Azure OpenAI API base here>\",\n",
     "        \"api_type\": \"azure\",\n",
     "        \"api_version\": \"2023-06-01-preview\"\n",
     "    },\n",
     "    {\n",
     "        \"model\": \"gpt-4-32k\",\n",
     "        \"api_key\": \"<your Azure OpenAI API key here>\",\n",
-    "        \"api_base\": \"<your Azure OpenAI API base here>\",\n",
+    "        \"base_url\": \"<your Azure OpenAI API base here>\",\n",
     "        \"api_type\": \"azure\",\n",
     "        \"api_version\": \"2023-06-01-preview\"\n",
     "    }\n",
@@ -84,7 +84,7 @@
     "import autogen\n",
     "\n",
     "llm_config={\n",
-    "    \"request_timeout\": 600,\n",
+    "    \"timeout\": 600,\n",
     "    \"seed\": 44,  # change the seed for different trials\n",
     "    \"config_list\": autogen.config_list_from_json(\n",
     "        \"OAI_CONFIG_LIST\",\n",
diff --git a/notebook/agentchat_two_users.ipynb b/notebook/agentchat_two_users.ipynb
index 22cb38db8ef..355693ac047 100644
--- a/notebook/agentchat_two_users.ipynb
+++ b/notebook/agentchat_two_users.ipynb
@@ -68,14 +68,14 @@
     "    {\n",
     "        \"model\": \"gpt-4\",\n",
     "        \"api_key\": \"<your Azure OpenAI API key here>\",\n",
-    "        \"api_base\": \"<your Azure OpenAI API base here>\",\n",
+    "        \"base_url\": \"<your Azure OpenAI API base here>\",\n",
     "        \"api_type\": \"azure\",\n",
     "        \"api_version\": \"2023-07-01-preview\"\n",
     "    },\n",
     "    {\n",
     "        \"model\": \"gpt-4-32k\",\n",
     "        \"api_key\": \"<your Azure OpenAI API key here>\",\n",
-    "        \"api_base\": \"<your Azure OpenAI API base here>\",\n",
+    "        \"base_url\": \"<your Azure OpenAI API base here>\",\n",
     "        \"api_type\": \"azure\",\n",
     "        \"api_version\": \"2023-07-01-preview\"\n",
     "    }\n",
@@ -160,7 +160,7 @@
     "    name=\"assistant_for_student\",\n",
     "    system_message=\"You are a helpful assistant. Reply TERMINATE when the task is done.\",\n",
     "    llm_config={\n",
-    "        \"request_timeout\": 600,\n",
+    "        \"timeout\": 600,\n",
     "        \"seed\": 42,\n",
     "        # Excluding azure openai endpoints from the config list.\n",
     "        # Change to `exclude=\"openai\"` to exclude openai endpoints, or remove the `exclude` argument to include both.\n",
diff --git a/notebook/agentchat_web_info.ipynb b/notebook/agentchat_web_info.ipynb
index 6f202b19d2e..221462edc02 100644
--- a/notebook/agentchat_web_info.ipynb
+++ b/notebook/agentchat_web_info.ipynb
@@ -78,7 +78,7 @@
     ")\n",
     "\n",
     "llm_config={\n",
-    "    \"request_timeout\": 600,\n",
+    "    \"timeout\": 600,\n",
     "    \"seed\": 42,\n",
     "    \"config_list\": config_list,\n",
     "    \"temperature\": 0,\n",
@@ -102,14 +102,14 @@
     "    {\n",
     "        'model': 'gpt4',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
     "    {\n",
     "        'model': 'gpt-4-32k-0314',\n",
     "        'api_key': '<your Azure OpenAI API key here>',\n",
-    "        'api_base': '<your Azure OpenAI API base here>',\n",
+    "        'base_url': '<your Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },\n",
diff --git a/notebook/oai_chatgpt_gpt4.ipynb b/notebook/oai_chatgpt_gpt4.ipynb
index 9f273db8b5c..921f1279063 100644
--- a/notebook/oai_chatgpt_gpt4.ipynb
+++ b/notebook/oai_chatgpt_gpt4.ipynb
@@ -51,7 +51,7 @@
    },
    "outputs": [],
    "source": [
-    "# %pip install \"pyautogen[blendsearch]\" datasets"
+    "# %pip install \"pyautogen[blendsearch]<0.2\" datasets"
    ]
   },
   {
@@ -124,13 +124,13 @@
     "    {'api_key': '<your OpenAI API key here>'},  # only if OpenAI API key is found\n",
     "    {\n",
     "        'api_key': '<your first Azure OpenAI API key here>',\n",
-    "        'api_base': '<your first Azure OpenAI API base here>',\n",
+    "        'base_url': '<your first Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },  # only if the at least one Azure OpenAI API key is found\n",
     "    {\n",
     "        'api_key': '<your second Azure OpenAI API key here>',\n",
-    "        'api_base': '<your second Azure OpenAI API base here>',\n",
+    "        'base_url': '<your second Azure OpenAI API base here>',\n",
     "        'api_type': 'azure',\n",
     "        'api_version': '2023-06-01-preview',\n",
     "    },  # only if the second Azure OpenAI API key is found\n",
diff --git a/notebook/oai_completion.ipynb b/notebook/oai_completion.ipynb
index 1d93140a561..f25db693fb8 100644
--- a/notebook/oai_completion.ipynb
+++ b/notebook/oai_completion.ipynb
@@ -91,13 +91,13 @@
     "#     },  # OpenAI API endpoint for gpt-4\n",
     "#     {\n",
     "#         'api_key': '<your first Azure OpenAI API key here>',\n",
-    "#         'api_base': '<your first Azure OpenAI API base here>',\n",
+    "#         'base_url': '<your first Azure OpenAI API base here>',\n",
     "#         'api_type': 'azure',\n",
     "#         'api_version': '2023-03-15-preview',\n",
     "#     },  # Azure OpenAI API endpoint for gpt-4\n",
     "#     {\n",
     "#         'api_key': '<your second Azure OpenAI API key here>',\n",
-    "#         'api_base': '<your second Azure OpenAI API base here>',\n",
+    "#         'base_url': '<your second Azure OpenAI API base here>',\n",
     "#         'api_type': 'azure',\n",
     "#         'api_version': '2023-03-15-preview',\n",
     "#     },  # another Azure OpenAI API endpoint for gpt-4\n",
@@ -125,14 +125,14 @@
     "#     {\n",
     "#         'model': 'gpt-3.5-turbo',\n",
     "#         'api_key': '<your first Azure OpenAI API key here>',\n",
-    "#         'api_base': '<your first Azure OpenAI API base here>',\n",
+    "#         'base_url': '<your first Azure OpenAI API base here>',\n",
     "#         'api_type': 'azure',\n",
     "#         'api_version': '2023-06-01-preview',\n",
     "#     },  # Azure OpenAI API endpoint for gpt-3.5-turbo\n",
     "#     {\n",
     "#         'model': 'gpt-35-turbo-v0301',\n",
     "#         'api_key': '<your second Azure OpenAI API key here>',\n",
-    "#         'api_base': '<your second Azure OpenAI API base here>',\n",
+    "#         'base_url': '<your second Azure OpenAI API base here>',\n",
     "#         'api_type': 'azure',\n",
     "#         'api_version': '2023-06-01-preview',\n",
     "#     },  # another Azure OpenAI API endpoint for gpt-3.5-turbo with deployment name gpt-35-turbo-v0301\n",
diff --git a/notebook/oai_openai_utils.ipynb b/notebook/oai_openai_utils.ipynb
index 38a5accb998..24973e3d908 100644
--- a/notebook/oai_openai_utils.ipynb
+++ b/notebook/oai_openai_utils.ipynb
@@ -38,7 +38,7 @@
     "assistant = AssistantAgent(\n",
     "    name=\"assistant\",\n",
     "    llm_config={\n",
-    "        \"request_timeout\": 600,\n",
+    "        \"timeout\": 600,\n",
     "        \"seed\": 42,\n",
     "        \"config_list\": config_list,\n",
     "        \"temperature\": 0,\n",
@@ -88,13 +88,13 @@
    "outputs": [],
    "source": [
     "api_keys = [\"YOUR_OPENAI_API_KEY\"]\n",
-    "api_bases = None  # You can specify API base URLs if needed. eg: localhost:8000\n",
+    "base_urls = None  # You can specify API base URLs if needed. eg: localhost:8000\n",
     "api_type = \"openai\"  # Type of API, e.g., \"openai\" or \"aoai\".\n",
     "api_version = None  # Specify API version if needed.\n",
     "\n",
     "config_list = autogen.get_config_list(\n",
     "    api_keys,\n",
-    "    api_bases=api_bases,\n",
+    "    base_urls=base_urls,\n",
     "    api_type=api_type,\n",
     "    api_version=api_version\n",
     ")\n",
@@ -226,7 +226,7 @@
     "assistant_one = autogen.AssistantAgent(\n",
     "    name=\"3.5-assistant\",\n",
     "    llm_config={\n",
-    "        \"request_timeout\": 600,\n",
+    "        \"timeout\": 600,\n",
     "        \"seed\": 42,\n",
     "        \"config_list\": cheap_config_list,\n",
     "        \"temperature\": 0,\n",
@@ -237,7 +237,7 @@
     "assistant_two = autogen.AssistantAgent(\n",
     "    name=\"4-assistant\",\n",
     "    llm_config={\n",
-    "        \"request_timeout\": 600,\n",
+    "        \"timeout\": 600,\n",
     "        \"seed\": 42,\n",
     "        \"config_list\": costly_config_list,\n",
     "        \"temperature\": 0,\n",
@@ -454,7 +454,7 @@
       "text/plain": [
        "[{'api_key': 'sk-*********************', 'model': 'gpt-4'},\n",
        " {'api_key': '1234567890234567890',\n",
-       "  'api_base': 'https://api.someotherapi.com',\n",
+       "  'base_url': 'https://api.someotherapi.com',\n",
        "  'api_type': 'aoai',\n",
        "  'api_version': 'v2',\n",
        "  'model': 'gpt-3.5-turbo'}]"
@@ -474,7 +474,7 @@
     "            \"api_key_env_var\": \"ANOTHER_API_KEY\",\n",
     "            \"api_type\": \"aoai\",\n",
     "            \"api_version\": \"v2\",\n",
-    "            \"api_base\": \"https://api.someotherapi.com\"\n",
+    "            \"base_url\": \"https://api.someotherapi.com\"\n",
     "        }\n",
     "    },\n",
     "    filter_dict={\n",
diff --git a/setup.py b/setup.py
index d3308bf1ccc..b9977ed5ec1 100644
--- a/setup.py
+++ b/setup.py
@@ -14,11 +14,12 @@
 __version__ = version["__version__"]
 
 install_requires = [
-    "openai<1",
+    "openai==1.0.0b3",
     "diskcache",
     "termcolor",
     "flaml",
     "python-dotenv",
+    "tiktoken",
 ]
 
 
@@ -39,25 +40,17 @@
     install_requires=install_requires,
     extras_require={
         "test": [
-            "chromadb",
-            "lancedb",
             "coverage>=5.3",
-            "datasets",
             "ipykernel",
             "nbconvert",
             "nbformat",
             "pre-commit",
-            "pydantic==1.10.9",
             "pytest-asyncio",
             "pytest>=6.1.1",
-            "sympy",
-            "tiktoken",
-            "wolframalpha",
-            "qdrant_client[fastembed]",
         ],
         "blendsearch": ["flaml[blendsearch]"],
         "mathchat": ["sympy", "pydantic==1.10.9", "wolframalpha"],
-        "retrievechat": ["chromadb", "tiktoken", "sentence_transformers", "pypdf", "ipython"],
+        "retrievechat": ["chromadb", "sentence_transformers", "pypdf", "ipython"],
         "teachable": ["chromadb"],
     },
     classifiers=[
diff --git a/test/agentchat/chat_with_teachable_agent.py b/test/agentchat/chat_with_teachable_agent.py
index 211ebe59097..f11635a8c68 100644
--- a/test/agentchat/chat_with_teachable_agent.py
+++ b/test/agentchat/chat_with_teachable_agent.py
@@ -26,7 +26,7 @@ def create_teachable_agent(reset_db=False):
     config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict=filter_dict)
     teachable_agent = TeachableAgent(
         name="teachableagent",
-        llm_config={"config_list": config_list, "request_timeout": 120, "use_cache": use_cache},
+        llm_config={"config_list": config_list, "timeout": 120, "use_cache": use_cache},
         teach_config={
             "verbosity": verbosity,
             "reset_db": reset_db,
diff --git a/test/agentchat/test_qdrant_retrievechat.py b/test/agentchat/contrib/test_qdrant_retrievechat.py
similarity index 80%
rename from test/agentchat/test_qdrant_retrievechat.py
rename to test/agentchat/contrib/test_qdrant_retrievechat.py
index 9600b507e04..1d3c5afd6af 100644
--- a/test/agentchat/test_qdrant_retrievechat.py
+++ b/test/agentchat/contrib/test_qdrant_retrievechat.py
@@ -1,10 +1,11 @@
 import os
-
+import sys
 import pytest
-
 from autogen.agentchat.contrib.retrieve_assistant_agent import RetrieveAssistantAgent
-from autogen import ChatCompletion, config_list_from_json
-from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST
+from autogen import config_list_from_json
+
+sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
+from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST  # noqa: E402
 
 try:
     from qdrant_client import QdrantClient
@@ -19,32 +20,34 @@
 except ImportError:
     QDRANT_INSTALLED = False
 
-test_dir = os.path.join(os.path.dirname(__file__), "..", "test_files")
+try:
+    import openai
+
+    OPENAI_INSTALLED = True
+except ImportError:
+    OPENAI_INSTALLED = False
 
+test_dir = os.path.join(os.path.dirname(__file__), "../..", "test_files")
 
-@pytest.mark.skipif(not QDRANT_INSTALLED, reason="qdrant_client is not installed")
-def test_retrievechat():
-    try:
-        import openai
-    except ImportError:
-        return
 
+@pytest.mark.skipif(
+    sys.platform in ["darwin", "win32"] or not QDRANT_INSTALLED or not OPENAI_INSTALLED,
+    reason="do not run on MacOS or windows or dependency is not installed",
+)
+def test_retrievechat():
     conversations = {}
-    ChatCompletion.start_logging(conversations)
+    # ChatCompletion.start_logging(conversations)  # deprecated in v0.2
 
     config_list = config_list_from_json(
         OAI_CONFIG_LIST,
         file_location=KEY_LOC,
-        filter_dict={
-            "model": ["gpt-4", "gpt4", "gpt-4-32k", "gpt-4-32k-0314"],
-        },
     )
 
     assistant = RetrieveAssistantAgent(
         name="assistant",
         system_message="You are a helpful assistant.",
         llm_config={
-            "request_timeout": 600,
+            "timeout": 600,
             "seed": 42,
             "config_list": config_list,
         },
diff --git a/test/agentchat/test_retrievechat.py b/test/agentchat/contrib/test_retrievechat.py
similarity index 82%
rename from test/agentchat/test_retrievechat.py
rename to test/agentchat/contrib/test_retrievechat.py
index d71d146194b..d701ebc532e 100644
--- a/test/agentchat/test_retrievechat.py
+++ b/test/agentchat/contrib/test_retrievechat.py
@@ -1,9 +1,13 @@
 import pytest
+import os
 import sys
 import autogen
-from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST
+
+sys.path.append(os.path.join(os.path.dirname(__file__), ".."))
+from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST  # noqa: E402
 
 try:
+    import openai
     from autogen.agentchat.contrib.retrieve_assistant_agent import (
         RetrieveAssistantAgent,
     )
@@ -20,30 +24,22 @@
 
 @pytest.mark.skipif(
     sys.platform in ["darwin", "win32"] or skip_test,
-    reason="do not run on MacOS or windows",
+    reason="do not run on MacOS or windows or dependency is not installed",
 )
 def test_retrievechat():
-    try:
-        import openai
-    except ImportError:
-        return
-
     conversations = {}
-    autogen.ChatCompletion.start_logging(conversations)
+    # autogen.ChatCompletion.start_logging(conversations)  # deprecated in v0.2
 
     config_list = autogen.config_list_from_json(
         OAI_CONFIG_LIST,
         file_location=KEY_LOC,
-        filter_dict={
-            "model": ["gpt-4", "gpt4", "gpt-4-32k", "gpt-4-32k-0314"],
-        },
     )
 
     assistant = RetrieveAssistantAgent(
         name="assistant",
         system_message="You are a helpful assistant.",
         llm_config={
-            "request_timeout": 600,
+            "timeout": 600,
             "seed": 42,
             "config_list": config_list,
         },
diff --git a/test/agentchat/test_assistant_agent.py b/test/agentchat/test_assistant_agent.py
index 1239bde453b..61510a44f9c 100644
--- a/test/agentchat/test_assistant_agent.py
+++ b/test/agentchat/test_assistant_agent.py
@@ -20,7 +20,7 @@ def test_ai_user_proxy_agent():
         return
 
     conversations = {}
-    autogen.ChatCompletion.start_logging(conversations)
+    # autogen.ChatCompletion.start_logging(conversations)
 
     config_list = autogen.config_list_from_json(
         OAI_CONFIG_LIST,
@@ -30,7 +30,7 @@ def test_ai_user_proxy_agent():
         "assistant",
         system_message="You are a helpful assistant.",
         llm_config={
-            "request_timeout": 600,
+            "timeout": 600,
             "seed": 42,
             "config_list": config_list,
         },
@@ -97,7 +97,10 @@ def test_gpt35(human_input_mode="NEVER", max_consecutive_auto_reply=5):
             "timeout": 60,
         },
         llm_config=llm_config,
-        system_message="""Reply TERMINATE to end the conversation.""",
+        system_message="""Is code provided but not enclosed in ``` blocks?
+If so, remind that code blocks need to be enclosed in ``` blocks.
+Reply TERMINATE to end the conversation if the task is finished. Don't say appreciation.
+If "Thank you" or "You\'re welcome" are said in the conversation, then say TERMINATE and that is your last message.""",
     )
     user.initiate_chat(assistant, message="TERMINATE")
     # should terminate without sending any message
@@ -119,9 +122,9 @@ def test_create_execute_script(human_input_mode="NEVER", max_consecutive_auto_re
 
     config_list = autogen.config_list_from_json(OAI_CONFIG_LIST, file_location=KEY_LOC)
     conversations = {}
-    autogen.ChatCompletion.start_logging(conversations)
+    # autogen.ChatCompletion.start_logging(conversations)
     llm_config = {
-        "request_timeout": 600,
+        "timeout": 600,
         "seed": 42,
         "config_list": config_list,
     }
@@ -148,12 +151,12 @@ def test_create_execute_script(human_input_mode="NEVER", max_consecutive_auto_re
 ```""",
     )
     print(conversations)
-    autogen.ChatCompletion.print_usage_summary()
-    autogen.ChatCompletion.start_logging(compact=False)
+    # autogen.ChatCompletion.print_usage_summary()
+    # autogen.ChatCompletion.start_logging(compact=False)
     user.send("""Execute temp.py""", assistant)
-    print(autogen.ChatCompletion.logged_history)
-    autogen.ChatCompletion.print_usage_summary()
-    autogen.ChatCompletion.stop_logging()
+    # print(autogen.ChatCompletion.logged_history)
+    # autogen.ChatCompletion.print_usage_summary()
+    # autogen.ChatCompletion.stop_logging()
 
 
 def test_tsp(human_input_mode="NEVER", max_consecutive_auto_reply=10):
@@ -184,7 +187,7 @@ def __init__(self, *args, **kwargs):
         def generate_init_message(self, question) -> str:
             return self._prompt.format(question=question)
 
-    autogen.ChatCompletion.start_logging()
+    # autogen.ChatCompletion.start_logging()
     assistant = AssistantAgent("assistant", llm_config={"temperature": 0, "config_list": config_list})
     user = TSPUserProxyAgent(
         "user",
@@ -193,14 +196,14 @@ def generate_init_message(self, question) -> str:
         max_consecutive_auto_reply=max_consecutive_auto_reply,
     )
     user.initiate_chat(assistant, question=hard_questions[2])
-    print(autogen.ChatCompletion.logged_history)
-    autogen.ChatCompletion.stop_logging()
+    # print(autogen.ChatCompletion.logged_history)
+    # autogen.ChatCompletion.stop_logging()
 
 
 if __name__ == "__main__":
-    test_gpt35()
+    # test_gpt35()
     # test_create_execute_script(human_input_mode="TERMINATE")
     # when GPT-4, i.e., the DEFAULT_MODEL, is used, conversation in the following test
     # should terminate in 2-3 rounds of interactions (because is_termination_msg should be true after 2-3 rounds)
     # although the max_consecutive_auto_reply is set to 10.
-    # test_tsp(human_input_mode="NEVER", max_consecutive_auto_reply=10)
+    test_tsp(human_input_mode="NEVER", max_consecutive_auto_reply=10)
diff --git a/test/agentchat/test_async.py b/test/agentchat/test_async.py
index 9a806e6af40..ba332b25379 100644
--- a/test/agentchat/test_async.py
+++ b/test/agentchat/test_async.py
@@ -1,3 +1,4 @@
+import pytest
 import asyncio
 import autogen
 from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST
@@ -44,6 +45,7 @@ def get_market_news(ind, ind_upper):
     return feeds_summary
 
 
+@pytest.mark.asyncio
 async def test_stream():
     try:
         import openai
@@ -68,7 +70,7 @@ async def add_stock_price_data():
     assistant = autogen.AssistantAgent(
         name="assistant",
         llm_config={
-            "request_timeout": 600,
+            "timeout": 600,
             "seed": 41,
             "config_list": config_list,
             "temperature": 0,
diff --git a/test/agentchat/test_async_get_human_input.py b/test/agentchat/test_async_get_human_input.py
index cdc7ea2aa7e..fc8a8e31504 100644
--- a/test/agentchat/test_async_get_human_input.py
+++ b/test/agentchat/test_async_get_human_input.py
@@ -16,7 +16,7 @@ async def test_async_get_human_input():
     assistant = autogen.AssistantAgent(
         name="assistant",
         max_consecutive_auto_reply=2,
-        llm_config={"request_timeout": 600, "seed": 41, "config_list": config_list, "temperature": 0},
+        llm_config={"timeout": 600, "seed": 41, "config_list": config_list, "temperature": 0},
     )
 
     user_proxy = autogen.UserProxyAgent(name="user", human_input_mode="ALWAYS", code_execution_config=False)
diff --git a/test/agentchat/test_conversable_agent.py b/test/agentchat/test_conversable_agent.py
index 905e34bddd9..2d26027a34a 100644
--- a/test/agentchat/test_conversable_agent.py
+++ b/test/agentchat/test_conversable_agent.py
@@ -15,7 +15,7 @@ def conversable_agent():
 
 def test_trigger():
     agent = ConversableAgent("a0", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER")
-    agent1 = ConversableAgent("a1", max_consecutive_auto_reply=0, human_input_mode="NEVER")
+    agent1 = ConversableAgent("a1", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER")
     agent.register_reply(agent1, lambda recipient, messages, sender, config: (True, "hello"))
     agent1.initiate_chat(agent, message="hi")
     assert agent1.last_message(agent)["content"] == "hello"
@@ -53,7 +53,7 @@ def test_trigger():
 
 def test_context():
     agent = ConversableAgent("a0", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER")
-    agent1 = ConversableAgent("a1", max_consecutive_auto_reply=0, human_input_mode="NEVER")
+    agent1 = ConversableAgent("a1", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER")
     agent1.send(
         {
             "content": "hello {name}",
@@ -131,7 +131,7 @@ def test_generate_code_execution_reply():
 
 def test_max_consecutive_auto_reply():
     agent = ConversableAgent("a0", max_consecutive_auto_reply=2, llm_config=False, human_input_mode="NEVER")
-    agent1 = ConversableAgent("a1", max_consecutive_auto_reply=0, human_input_mode="NEVER")
+    agent1 = ConversableAgent("a1", max_consecutive_auto_reply=0, llm_config=False, human_input_mode="NEVER")
     assert agent.max_consecutive_auto_reply() == agent.max_consecutive_auto_reply(agent1) == 2
     agent.update_max_consecutive_auto_reply(1)
     assert agent.max_consecutive_auto_reply() == agent.max_consecutive_auto_reply(agent1) == 1
@@ -159,8 +159,8 @@ def test_max_consecutive_auto_reply():
 
 
 def test_conversable_agent():
-    dummy_agent_1 = ConversableAgent(name="dummy_agent_1", human_input_mode="ALWAYS")
-    dummy_agent_2 = ConversableAgent(name="dummy_agent_2", human_input_mode="TERMINATE")
+    dummy_agent_1 = ConversableAgent(name="dummy_agent_1", llm_config=False, human_input_mode="ALWAYS")
+    dummy_agent_2 = ConversableAgent(name="dummy_agent_2", llm_config=False, human_input_mode="TERMINATE")
 
     # monkeypatch.setattr(sys, "stdin", StringIO("exit"))
     dummy_agent_1.receive("hello", dummy_agent_2)  # receive a str
@@ -212,7 +212,9 @@ def add_num(num_to_be_added):
         given_num = 10
         return num_to_be_added + given_num
 
-    dummy_agent_2 = ConversableAgent(name="user_proxy", human_input_mode="TERMINATE", function_map={"add_num": add_num})
+    dummy_agent_2 = ConversableAgent(
+        name="user_proxy", llm_config=False, human_input_mode="TERMINATE", function_map={"add_num": add_num}
+    )
     messsages = [{"function_call": {"name": "add_num", "arguments": '{ "num_to_be_added": 5 }'}, "role": "assistant"}]
 
     # when sender is None, messages is provided
@@ -221,7 +223,7 @@ def add_num(num_to_be_added):
     ), "generate_reply not working when sender is None"
 
     # when sender is provided, messages is None
-    dummy_agent_1 = ConversableAgent(name="dummy_agent_1", human_input_mode="ALWAYS")
+    dummy_agent_1 = ConversableAgent(name="dummy_agent_1", llm_config=False, human_input_mode="ALWAYS")
     dummy_agent_2._oai_messages[dummy_agent_1] = messsages
     assert (
         dummy_agent_2.generate_reply(messages=None, sender=dummy_agent_1)["content"] == "15"
diff --git a/test/test_function_call.py b/test/agentchat/test_function_call.py
similarity index 92%
rename from test/test_function_call.py
rename to test/agentchat/test_function_call.py
index a78ff131d82..ef2ad5cc3ee 100644
--- a/test/test_function_call.py
+++ b/test/agentchat/test_function_call.py
@@ -1,15 +1,16 @@
 try:
-    import openai
+    from openai import OpenAI
 except ImportError:
-    openai = None
+    OpenAI = None
 import pytest
+import asyncio
 import json
 import autogen
 from autogen.math_utils import eval_math_responses
-from test_code import KEY_LOC
+from test_assistant_agent import KEY_LOC
 
 
-@pytest.mark.skipif(openai is None, reason="openai not installed")
+@pytest.mark.skipif(OpenAI is None, reason="openai>=1 not installed")
 def test_eval_math_responses():
     config_list = autogen.config_list_from_models(
         KEY_LOC, exclude="aoai", model_list=["gpt-4-0613", "gpt-3.5-turbo-0613", "gpt-3.5-turbo-16k"]
@@ -35,8 +36,8 @@ def test_eval_math_responses():
             },
         },
     ]
-    response = autogen.ChatCompletion.create(
-        config_list=config_list,
+    client = autogen.OpenAIWrapper(config_list=config_list)
+    response = client.create(
         messages=[
             {
                 "role": "user",
@@ -46,10 +47,10 @@ def test_eval_math_responses():
         functions=functions,
     )
     print(response)
-    responses = autogen.ChatCompletion.extract_text_or_function_call(response)
+    responses = client.extract_text_or_function_call(response)
     print(responses[0])
-    function_call = responses[0]["function_call"]
-    name, arguments = function_call["name"], json.loads(function_call["arguments"])
+    function_call = responses[0].function_call
+    name, arguments = function_call.name, json.loads(function_call.arguments)
     assert name == "eval_math_responses"
     print(arguments["responses"])
     # if isinstance(arguments["responses"], str):
@@ -188,7 +189,7 @@ def get_number():
 
 
 if __name__ == "__main__":
-    test_json_extraction()
-    test_execute_function()
-    test_a_execute_function()
+    # test_json_extraction()
+    # test_execute_function()
+    asyncio.run(test_a_execute_function())
     test_eval_math_responses()
diff --git a/test/test_with_openai.py b/test/agentchat/test_function_call_groupchat.py
similarity index 92%
rename from test/test_with_openai.py
rename to test/agentchat/test_function_call_groupchat.py
index ed63324be6a..fefb620ad1c 100644
--- a/test/test_with_openai.py
+++ b/test/agentchat/test_function_call_groupchat.py
@@ -1,13 +1,14 @@
 import autogen
 import pytest
 import sys
+from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST
 
 try:
-    import openai
-
-    skip = False
+    from openai import OpenAI
 except ImportError:
     skip = True
+else:
+    skip = False
 
 
 @pytest.mark.skipif(
@@ -21,10 +22,11 @@ def get_random_number():
         return random.randint(0, 100)
 
     config_list_gpt4 = autogen.config_list_from_json(
-        "OAI_CONFIG_LIST",
+        OAI_CONFIG_LIST,
         filter_dict={
             "model": ["gpt-4", "gpt-4-0314", "gpt4", "gpt-4-32k", "gpt-4-32k-0314", "gpt-4-32k-v0314"],
         },
+        file_location=KEY_LOC,
     )
     llm_config = {
         "config_list": config_list_gpt4,
diff --git a/test/agentchat/test_math_user_proxy_agent.py b/test/agentchat/test_math_user_proxy_agent.py
index 7874c2168e4..2975e8d0f16 100644
--- a/test/agentchat/test_math_user_proxy_agent.py
+++ b/test/agentchat/test_math_user_proxy_agent.py
@@ -8,21 +8,23 @@
 )
 from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST
 
+try:
+    from openai import OpenAI
+except ImportError:
+    skip = True
+else:
+    skip = False
+
 
 @pytest.mark.skipif(
-    sys.platform in ["darwin", "win32"],
+    skip or sys.platform in ["darwin", "win32"],
     reason="do not run on MacOS or windows",
 )
 def test_math_user_proxy_agent():
-    try:
-        import openai
-    except ImportError:
-        return
-
     from autogen.agentchat.assistant_agent import AssistantAgent
 
     conversations = {}
-    autogen.ChatCompletion.start_logging(conversations)
+    # autogen.ChatCompletion.start_logging(conversations)
 
     config_list = autogen.config_list_from_json(
         OAI_CONFIG_LIST,
@@ -35,7 +37,7 @@ def test_math_user_proxy_agent():
         "assistant",
         system_message="You are a helpful assistant.",
         llm_config={
-            "request_timeout": 600,
+            "timeout": 600,
             "seed": 42,
             "config_list": config_list,
         },
diff --git a/test/agentchat/test_teachable_agent.py b/test/agentchat/test_teachable_agent.py
index 7a3367dbd72..5a5be0379a1 100644
--- a/test/agentchat/test_teachable_agent.py
+++ b/test/agentchat/test_teachable_agent.py
@@ -1,14 +1,15 @@
 try:
-    import openai
-
-    skip = False
+    from openai import OpenAI
+    from autogen.agentchat.contrib.teachable_agent import TeachableAgent
 except ImportError:
     skip = True
+else:
+    skip = False
+
 import pytest
 import sys
 from autogen import ConversableAgent, config_list_from_json
-from autogen.agentchat.contrib.teachable_agent import TeachableAgent
-
+from test_assistant_agent import OAI_CONFIG_LIST, KEY_LOC
 
 try:
     from termcolor import colored
@@ -24,7 +25,8 @@ def colored(x, *args, **kwargs):
 
 assert_on_error = False  # GPT-4 nearly always succeeds on these unit tests, but GPT-3.5 is a bit less reliable.
 recall_threshold = 1.5  # Higher numbers allow more (but less relevant) memos to be recalled.
-use_cache = False  # If True, cached LLM calls will be skipped and responses pulled from cache. False exposes LLM non-determinism.
+seed = None
+# If int, cached LLM calls will be skipped and responses pulled from cache. None exposes LLM non-determinism.
 
 # Specify the model to use by uncommenting one of the following lines.
 # filter_dict={"model": ["gpt-4-0613"]}
@@ -38,10 +40,10 @@ def create_teachable_agent(reset_db=False, verbosity=0):
     # Load LLM inference endpoints from an env variable or a file
     # See https://microsoft.github.io/autogen/docs/FAQ#set-your-api-endpoints
     # and OAI_CONFIG_LIST_sample
-    config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict=filter_dict)
+    config_list = config_list_from_json(env_or_file=OAI_CONFIG_LIST, filter_dict=filter_dict, file_location=KEY_LOC)
     teachable_agent = TeachableAgent(
         name="teachableagent",
-        llm_config={"config_list": config_list, "request_timeout": 120, "use_cache": use_cache},
+        llm_config={"config_list": config_list, "timeout": 120, "seed": seed},
         teach_config={
             "verbosity": verbosity,
             "reset_db": reset_db,
@@ -137,8 +139,8 @@ def use_task_advice_pair_phrasing():
 
 
 @pytest.mark.skipif(
-    skip or not sys.version.startswith("3.9"),
-    reason="do not run if openai is not installed or py!=3.9",
+    skip or not sys.version.startswith("3.11"),
+    reason="do not run if dependency is not installed or py!=3.11",
 )
 def test_all():
     """Runs this file's unit tests."""
diff --git a/test/oai/test_completion.py b/test/oai/_test_completion.py
similarity index 99%
rename from test/oai/test_completion.py
rename to test/oai/_test_completion.py
index b6cb5c31b1c..b13cae51578 100644
--- a/test/oai/test_completion.py
+++ b/test/oai/_test_completion.py
@@ -13,9 +13,8 @@
     generate_code,
 )
 from autogen.math_utils import eval_math_responses, solve_problem
+from test.oai.test_utils import KEY_LOC, OAI_CONFIG_LIST
 
-KEY_LOC = "notebook"
-OAI_CONFIG_LIST = "OAI_CONFIG_LIST"
 here = os.path.abspath(os.path.dirname(__file__))
 
 
@@ -272,7 +271,7 @@ def test_humaneval(num_samples=1):
     )
     response = autogen.ChatCompletion.create(context=test_data[0], config_list=config_list, **config)
     print(response)
-    from openai.error import RateLimitError
+    from openai import RateLimitError
 
     try:
         code, cost, selected = implement(tune_data[1], [{**config_list[-1], **config}])
diff --git a/test/oai/test_client.py b/test/oai/test_client.py
new file mode 100644
index 00000000000..c74332588b6
--- /dev/null
+++ b/test/oai/test_client.py
@@ -0,0 +1,50 @@
+import pytest
+from autogen import OpenAIWrapper, config_list_from_json, config_list_openai_aoai
+from test_utils import OAI_CONFIG_LIST, KEY_LOC
+
+try:
+    from openai import OpenAI
+except ImportError:
+    skip = True
+else:
+    skip = False
+
+
+@pytest.mark.skipif(skip, reason="openai>=1 not installed")
+def test_aoai_chat_completion():
+    config_list = config_list_from_json(
+        env_or_file=OAI_CONFIG_LIST,
+        file_location=KEY_LOC,
+        filter_dict={"api_type": ["azure"], "model": ["gpt-3.5-turbo"]},
+    )
+    client = OpenAIWrapper(config_list=config_list)
+    response = client.create(messages=[{"role": "user", "content": "2+2="}])
+    print(response)
+    print(client.extract_text_or_function_call(response))
+
+
+@pytest.mark.skipif(skip, reason="openai>=1 not installed")
+def test_chat_completion():
+    config_list = config_list_from_json(
+        env_or_file=OAI_CONFIG_LIST,
+        file_location=KEY_LOC,
+    )
+    client = OpenAIWrapper(config_list=config_list)
+    response = client.create(messages=[{"role": "user", "content": "1+1="}])
+    print(response)
+    print(client.extract_text_or_function_call(response))
+
+
+@pytest.mark.skipif(skip, reason="openai>=1 not installed")
+def test_completion():
+    config_list = config_list_openai_aoai(KEY_LOC)
+    client = OpenAIWrapper(config_list=config_list)
+    response = client.create(prompt="1+1=", model="gpt-3.5-turbo-instruct")
+    print(response)
+    print(client.extract_text_or_function_call(response))
+
+
+if __name__ == "__main__":
+    test_aoai_chat_completion()
+    test_chat_completion()
+    test_completion()
diff --git a/test/oai/test_utils.py b/test/oai/test_utils.py
index 8813ff0d7ae..579fc6f9d8a 100644
--- a/test/oai/test_utils.py
+++ b/test/oai/test_utils.py
@@ -5,10 +5,12 @@
 import logging
 import tempfile
 from unittest import mock
-from test_completion import KEY_LOC, OAI_CONFIG_LIST
+import autogen  # noqa: E402
+
+KEY_LOC = "notebook"
+OAI_CONFIG_LIST = "OAI_CONFIG_LIST"
 
 sys.path.append("../../autogen")
-import autogen  # noqa: E402
 
 # Example environment variables
 ENV_VARS = {
@@ -24,7 +26,7 @@
         "api_key_env_var": "ANOTHER_API_KEY",
         "api_type": "aoai",
         "api_version": "v2",
-        "api_base": "https://api.someotherapi.com",
+        "base_url": "https://api.someotherapi.com",
     },
 }
 
diff --git a/test/test_code.py b/test/test_code.py
index 65c407cc316..32766d4d70c 100644
--- a/test/test_code.py
+++ b/test/test_code.py
@@ -284,7 +284,7 @@ def test_execute_code_no_docker():
     assert image is None
 
 
-def test_improve():
+def _test_improve():
     try:
         import openai
     except ImportError:
diff --git a/test/test_notebook.py b/test/test_notebook.py
index 68d3052750f..0c0adf1cb33 100644
--- a/test/test_notebook.py
+++ b/test/test_notebook.py
@@ -45,8 +45,8 @@ def run_notebook(input_nb, output_nb="executed_openai_notebook.ipynb", save=Fals
 
 
 @pytest.mark.skipif(
-    skip or not sys.version.startswith("3.10"),
-    reason="do not run if openai is not installed or py!=3.10",
+    skip or not sys.version.startswith("3.11"),
+    reason="do not run if openai is not installed or py!=3.11",
 )
 def test_agentchat_auto_feedback_from_code(save=False):
     run_notebook("agentchat_auto_feedback_from_code_execution.ipynb", save=save)
@@ -56,7 +56,7 @@ def test_agentchat_auto_feedback_from_code(save=False):
     skip or not sys.version.startswith("3.10"),
     reason="do not run if openai is not installed or py!=3.10",
 )
-def test_oai_completion(save=False):
+def _test_oai_completion(save=False):
     run_notebook("oai_completion.ipynb", save=save)
 
 
@@ -72,7 +72,7 @@ def test_agentchat_function_call(save=False):
     skip or not sys.version.startswith("3.10"),
     reason="do not run if openai is not installed or py!=3.10",
 )
-def test_agentchat_MathChat(save=False):
+def _test_agentchat_MathChat(save=False):
     run_notebook("agentchat_MathChat.ipynb", save=save)
 
 
@@ -80,7 +80,7 @@ def test_agentchat_MathChat(save=False):
     skip or not sys.version.startswith("3.11"),
     reason="do not run if openai is not installed or py!=3.11",
 )
-def test_oai_chatgpt_gpt4(save=False):
+def _test_oai_chatgpt_gpt4(save=False):
     run_notebook("oai_chatgpt_gpt4.ipynb", save=save)
 
 
diff --git a/test/test_retrieve_utils.py b/test/test_retrieve_utils.py
index 0585b2a147d..2423c1e0a2e 100644
--- a/test/test_retrieve_utils.py
+++ b/test/test_retrieve_utils.py
@@ -1,21 +1,24 @@
 """
 Unit test for retrieve_utils.py
 """
-
-from autogen.retrieve_utils import (
-    split_text_to_chunks,
-    extract_text_from_pdf,
-    split_files_to_chunks,
-    get_files_from_dir,
-    is_url,
-    create_vector_db_from_dir,
-    query_vector_db,
-)
-from autogen.token_count_utils import count_token
-
+try:
+    import chromadb
+    from autogen.retrieve_utils import (
+        split_text_to_chunks,
+        extract_text_from_pdf,
+        split_files_to_chunks,
+        get_files_from_dir,
+        is_url,
+        create_vector_db_from_dir,
+        query_vector_db,
+    )
+    from autogen.token_count_utils import count_token
+except ImportError:
+    skip = True
+else:
+    skip = False
 import os
 import pytest
-import chromadb
 
 
 test_dir = os.path.join(os.path.dirname(__file__), "test_files")
@@ -25,6 +28,7 @@
 integration, testing, and deployment."""
 
 
+@pytest.mark.skipif(skip, reason="dependency is not installed")
 class TestRetrieveUtils:
     def test_split_text_to_chunks(self):
         long_text = "A" * 10000
@@ -163,7 +167,9 @@ def custom_text_split_function(text):
 
     def test_retrieve_utils(self):
         client = chromadb.PersistentClient(path="/tmp/chromadb")
-        create_vector_db_from_dir(dir_path="./website/docs", client=client, collection_name="autogen-docs")
+        create_vector_db_from_dir(
+            dir_path="./website/docs", client=client, collection_name="autogen-docs", get_or_create=True
+        )
         results = query_vector_db(
             query_texts=[
                 "How can I use AutoGen UserProxyAgent and AssistantAgent to do code generation?",
diff --git a/website/blog/2023-07-14-Local-LLMs/index.mdx b/website/blog/2023-07-14-Local-LLMs/index.mdx
index 136e05e453f..8c06ae1e199 100644
--- a/website/blog/2023-07-14-Local-LLMs/index.mdx
+++ b/website/blog/2023-07-14-Local-LLMs/index.mdx
@@ -64,7 +64,7 @@ class CompletionResponseStreamChoice(BaseModel):
 ```
 
 
-## Interact with model using `oai.Completion`
+## Interact with model using `oai.Completion` (requires openai<1)
 
 Now the models can be directly accessed through openai-python library as well as `autogen.oai.Completion` and `autogen.oai.ChatCompletion`.
 
@@ -77,7 +77,7 @@ response = oai.Completion.create(
     config_list=[
         {
             "model": "chatglm2-6b",
-            "api_base": "http://localhost:8000/v1",
+            "base_url": "http://localhost:8000/v1",
             "api_type": "open_ai",
             "api_key": "NULL", # just a placeholder
         }
@@ -91,7 +91,7 @@ response = oai.ChatCompletion.create(
     config_list=[
         {
             "model": "chatglm2-6b",
-            "api_base": "http://localhost:8000/v1",
+            "base_url": "http://localhost:8000/v1",
             "api_type": "open_ai",
             "api_key": "NULL",
         }
@@ -125,13 +125,13 @@ response = oai.ChatCompletion.create(
     config_list=[
         {
             "model": "chatglm2-6b",
-            "api_base": "http://localhost:8000/v1",
+            "base_url": "http://localhost:8000/v1",
             "api_type": "open_ai",
             "api_key": "NULL",
         },
         {
             "model": "vicuna-7b-v1.3",
-            "api_base": "http://localhost:8000/v1",
+            "base_url": "http://localhost:8000/v1",
             "api_type": "open_ai",
             "api_key": "NULL",
         }
diff --git a/website/blog/2023-10-18-RetrieveChat/index.mdx b/website/blog/2023-10-18-RetrieveChat/index.mdx
index 71d2ad3f46c..75fde3916fb 100644
--- a/website/blog/2023-10-18-RetrieveChat/index.mdx
+++ b/website/blog/2023-10-18-RetrieveChat/index.mdx
@@ -292,7 +292,7 @@ llm_config = {
         },
     ],
     "config_list": config_list,
-    "request_timeout": 60,
+    "timeout": 60,
     "seed": 42,
 }
 
diff --git a/website/blog/2023-10-26-TeachableAgent/index.mdx b/website/blog/2023-10-26-TeachableAgent/index.mdx
index 434e0946b54..51c2e56a38b 100644
--- a/website/blog/2023-10-26-TeachableAgent/index.mdx
+++ b/website/blog/2023-10-26-TeachableAgent/index.mdx
@@ -51,7 +51,7 @@ from autogen.agentchat.contrib.teachable_agent import TeachableAgent
 # and OAI_CONFIG_LIST_sample
 filter_dict = {"model": ["gpt-4"]}  # GPT-3.5 is less reliable than GPT-4 at learning from user feedback.
 config_list = config_list_from_json(env_or_file="OAI_CONFIG_LIST", filter_dict=filter_dict)
-llm_config={"config_list": config_list, "request_timeout": 120}
+llm_config={"config_list": config_list, "timeout": 120}
 ```
 
 4. Create the agents
diff --git a/website/docs/Contribute.md b/website/docs/Contribute.md
index 8f264c9bd73..7d41b8c906e 100644
--- a/website/docs/Contribute.md
+++ b/website/docs/Contribute.md
@@ -120,7 +120,7 @@ Tests are automatically run via GitHub actions. There are two workflows:
 1. [build.yml](https://github.com/microsoft/autogen/blob/main/.github/workflows/build.yml)
 1. [openai.yml](https://github.com/microsoft/autogen/blob/main/.github/workflows/openai.yml)
 
-The first workflow is required to pass for all PRs. The second workflow is required for changes that affect the openai tests. The second workflow requires approval to run. When writing tests that require openai, please use [`pytest.mark.skipif`](https://github.com/microsoft/autogen/blob/a456b512d5a933ce9707ce51c465ea35a9dd180c/test/test_with_openai.py#L13) to make them run in one python version only when openai is installed. If additional dependency for this test is required, install the dependency in the corresponding python version in [openai.yml](https://github.com/microsoft/autogen/blob/main/.github/workflows/openai.yml).
+The first workflow is required to pass for all PRs. The second workflow is required for changes that affect the openai tests. The second workflow requires approval to run. When writing tests that require openai, please use [`pytest.mark.skipif`](https://github.com/microsoft/autogen/blob/main/test/test_client.py#L13) to make them run in one python version only when openai is installed. If additional dependency for this test is required, install the dependency in the corresponding python version in [openai.yml](https://github.com/microsoft/autogen/blob/main/.github/workflows/openai.yml).
 
 ### Coverage
 
diff --git a/website/docs/Examples/AutoGen-AgentChat.md b/website/docs/Examples/AgentChat.md
similarity index 95%
rename from website/docs/Examples/AutoGen-AgentChat.md
rename to website/docs/Examples/AgentChat.md
index a9a813ae6c1..961c44d5836 100644
--- a/website/docs/Examples/AutoGen-AgentChat.md
+++ b/website/docs/Examples/AgentChat.md
@@ -1,4 +1,4 @@
-# AutoGen - Automated Multi Agent Chat
+# Automated Multi Agent Chat
 
 AutoGen offers conversable agents powered by LLM, tool or human, which can be used to perform tasks collectively via automated chat. This framework allows tool use and human participation via multi-agent conversation.
 Please find documentation about this feature [here](/docs/Use-Cases/agent_chat).
@@ -25,7 +25,7 @@ Links to notebook examples:
 
    - Automated Chess Game Playing & Chitchatting by GPT-4 Agents - [View Notebook](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_chess.ipynb)
    - Automated Continual Learning from New Data - [View Notebook](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_stream.ipynb)
-   - [OptiGuide](https://github.com/microsoft/optiguide) - Large Language Models for Supply Chain Optimization.
+   - [OptiGuide](https://github.com/microsoft/optiguide) - Coding, Tool Using, Safeguarding & Question Anwering for Supply Chain Optimization
 
 4. **Tool Use**
 
diff --git a/website/docs/Examples/AutoGen-Inference.md b/website/docs/Examples/Inference.md
similarity index 96%
rename from website/docs/Examples/AutoGen-Inference.md
rename to website/docs/Examples/Inference.md
index d68504a1c7c..ad608985ec4 100644
--- a/website/docs/Examples/AutoGen-Inference.md
+++ b/website/docs/Examples/Inference.md
@@ -1,4 +1,4 @@
-# AutoGen - Tune GPT Models
+# Tune GPT Models
 
 AutoGen also offers a cost-effective hyperparameter optimization technique [EcoOptiGen](https://arxiv.org/abs/2303.04673) for tuning Large Language Models. The research study finds that tuning hyperparameters can significantly improve the utility of them.
 Please find documentation about this feature [here](/docs/Use-Cases/enhanced_inference).
diff --git a/website/docs/FAQ.md b/website/docs/FAQ.md
index 6c87cac03d8..40babd05c1b 100644
--- a/website/docs/FAQ.md
+++ b/website/docs/FAQ.md
@@ -22,7 +22,7 @@ assistant = autogen.AssistantAgent(
 )
 ```
 
-The `llm_config` is used in the [`create`](/docs/reference/oai/completion#create) function for LLM inference.
+The `llm_config` is used in the [`create`](/docs/reference/oai/client#create) function for LLM inference.
 When `llm_config` is not provided, the agent will rely on other openai settings such as `openai.api_key` or the environment variable `OPENAI_API_KEY`, which can also work when you'd like to use a single endpoint.
 You can also explicitly specify that by:
 ```python
@@ -35,11 +35,10 @@ Yes. Please check https://microsoft.github.io/autogen/blog/2023/07/14/Local-LLMs
 
 ## Handle Rate Limit Error and Timeout Error
 
-You can set `retry_wait_time` and `max_retry_period` to handle rate limit error. And you can set `request_timeout` to handle timeout error. They can all be specified in `llm_config` for an agent, which will be used in the [`create`](/docs/reference/oai/completion#create) function for LLM inference.
+You can set `max_retries` to handle rate limit error. And you can set `timeout` to handle timeout error. They can all be specified in `llm_config` for an agent, which will be used in the OpenAI client for LLM inference. They can be set differently for different clients if they are set in the `config_list`.
 
-- `retry_wait_time` (int): the time interval to wait (in seconds) before retrying a failed request.
-- `max_retry_period` (int): the total timeout (in seconds) allowed for retrying failed requests.
-- `request_timeout` (int): the timeout (in seconds) sent with a single request.
+- `max_retries` (int): the total number of times allowed for retrying failed requests for a single client.
+- `timeout` (int): the timeout (in seconds) for a single client.
 
 Please refer to the [documentation](/docs/Use-Cases/enhanced_inference#runtime-error) for more info.
 
diff --git a/website/docs/Getting-Started.md b/website/docs/Getting-Started.md
index c0e53aea5cd..63fc52f9455 100644
--- a/website/docs/Getting-Started.md
+++ b/website/docs/Getting-Started.md
@@ -8,11 +8,11 @@ AutoGen is a framework that enables development of LLM applications using multip
 
 ### Main Features
 
-* AutoGen enables building next-gen LLM applications based on **multi-agent conversations** with minimal effort. It simplifies the orchestration, automation and optimization of a complex LLM workflow. It maximizes the performance of LLM models and overcome their weaknesses.
-* It supports **diverse conversation patterns** for complex workflows. With customizable and conversable agents, developers can use AutoGen to build a wide range of conversation patterns concerning conversation autonomy,
+- AutoGen enables building next-gen LLM applications based on [multi-agent conversations](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat) with minimal effort. It simplifies the orchestration, automation, and optimization of a complex LLM workflow. It maximizes the performance of LLM models and overcomes their weaknesses.
+- It supports [diverse conversation patterns](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat#supporting-diverse-conversation-patterns) for complex workflows. With customizable and conversable agents, developers can use AutoGen to build a wide range of conversation patterns concerning conversation autonomy,
 the number of agents, and agent conversation topology.
-* It provides a collection of working systems with different complexities. These systems span a **wide range of applications** from various domains and complexities. They demonstrate how AutoGen can easily support different conversation patterns.
-* AutoGen provides **enhanced LLM inference**. It offers easy performance tuning, plus utilities like API unification & caching, and advanced usage patterns, such as error handling, multi-config inference, context programming etc.
+- It provides a collection of working systems with different complexities. These systems span a [wide range of applications](https://microsoft.github.io/autogen/docs/Use-Cases/agent_chat#diverse-applications-implemented-with-autogen) from various domains and complexities. This demonstrates how AutoGen can easily support diverse conversation patterns.
+- AutoGen provides [enhanced LLM inference](https://microsoft.github.io/autogen/docs/Use-Cases/enhanced_inference#api-unification). It offers utilities like API unification and caching, and advanced usage patterns, such as error handling, multi-config inference, context programming, etc.
 
 AutoGen is powered by collaborative [research studies](/docs/Research) from Microsoft, Penn State University, and University of Washington.
 
@@ -40,13 +40,13 @@ user_proxy.initiate_chat(assistant, message="Plot a chart of NVDA and TESLA stoc
 The figure below shows an example conversation flow with AutoGen.
 ![Agent Chat Example](/img/chat_example.png)
 
-* [Code examples](/docs/Examples/AutoGen-AgentChat).
+* [Code examples](/docs/Examples/AgentChat).
 * [Documentation](/docs/Use-Cases/agent_chat).
 
 #### Enhanced LLM Inferences
 Autogen also helps maximize the utility out of the expensive LLMs such as ChatGPT and GPT-4. It offers enhanced LLM inference with powerful functionalites like tuning, caching, error handling, templating. For example, you can optimize generations by LLM with your own tuning data, success metrics and budgets.
 ```python
-# perform tuning
+# perform tuning for openai<1
 config, analysis = autogen.Completion.tune(
     data=tune_data,
     metric="success",
@@ -60,13 +60,13 @@ config, analysis = autogen.Completion.tune(
 response = autogen.Completion.create(context=test_instance, **config)
 ```
 
-* [Code examples](/docs/Examples/AutoGen-Inference).
+* [Code examples](/docs/Examples/Inference).
 * [Documentation](/docs/Use-Cases/enhanced_inference).
 
 ### Where to Go Next ?
 
 * Understand the use cases for [multi-agent conversation](/docs/Use-Cases/agent_chat) and [enhanced LLM inference](/docs/Use-Cases/enhanced_inference).
-* Find [code examples](/docs/Examples/AutoGen-AgentChat).
+* Find [code examples](/docs/Examples/AgentChat).
 * Read [SDK](/docs/reference/agentchat/conversable_agent/).
 * Learn about [research](/docs/Research) around AutoGen.
 * [Roadmap](https://github.com/orgs/microsoft/projects/989/views/3)
diff --git a/website/docs/Installation.md b/website/docs/Installation.md
index 2cacceda2c0..508f207765b 100644
--- a/website/docs/Installation.md
+++ b/website/docs/Installation.md
@@ -35,7 +35,7 @@ Now, you're ready to install AutoGen in the virtual environment you've just crea
 
 ## Python
 
-AutoGen requires **Python version >= 3.8**. It can be installed from pip:
+AutoGen requires **Python version >= 3.8, < 3.12**. It can be installed from pip:
 
 ```bash
 pip install pyautogen
@@ -49,6 +49,24 @@ or conda:
 conda install pyautogen -c conda-forge
 ``` -->
 
+### Migration guide to v0.2
+
+openai v1 is a total rewrite of the library with many breaking changes. For example, the inference requires instantiating a client, instead of using a global class method.
+Therefore, some changes are required for users of `pyautogen<0.2`.
+
+- `api_base` -> `base_url`, `request_timeout` -> `timeout` in `llm_config` and `config_list`. `max_retry_period` and `retry_wait_time` are deprecated. `max_retries` can be set for each client.
+- MathChat, TeachableAgent are unsupported until they are tested in future release.
+- `autogen.Completion` and `autogen.ChatCompletion` are deprecated. The essential functionalities are moved to `autogen.OpenAIWrapper`:
+```python
+from autogen import OpenAIWrapper
+client = OpenAIWrapper(config_list=config_list)
+response = client.create(messages=[{"role": "user", "content": "2+2="}])
+print(client.extract_text_or_function_call(response))
+```
+- Inference parameter tuning and inference logging features are currently unavailable in `OpenAIWrapper`. Logging will be added in a future release.
+Inference parameter tuning can be done via [`flaml.tune`](https://microsoft.github.io/FLAML/docs/Use-Cases/Tune-User-Defined-Function).
+- `use_cache` is removed as a kwarg in `OpenAIWrapper.create()` for being automatically decided by `seed`: int | None.
+
 ### Optional Dependencies
 * docker
 
@@ -61,9 +79,9 @@ pip install docker
 
 * blendsearch
 
-AutoGen offers a cost-effective hyperparameter optimization technique [EcoOptiGen](https://arxiv.org/abs/2303.04673) for tuning Large Language Models. Please install with the [blendsearch] option to use it.
+`pyautogen<0.2` offers a cost-effective hyperparameter optimization technique [EcoOptiGen](https://arxiv.org/abs/2303.04673) for tuning Large Language Models. Please install with the [blendsearch] option to use it.
 ```bash
-pip install "pyautogen[blendsearch]"
+pip install "pyautogen[blendsearch]<0.2"
 ```
 
 Example notebooks:
@@ -72,9 +90,9 @@ Example notebooks:
 
 * retrievechat
 
-AutoGen supports retrieval-augmented generation tasks such as question answering and code generation with RAG agents. Please install with the [retrievechat] option to use it.
+`pyautogen<0.2` supports retrieval-augmented generation tasks such as question answering and code generation with RAG agents. Please install with the [retrievechat] option to use it.
 ```bash
-pip install "pyautogen[retrievechat]"
+pip install "pyautogen[retrievechat]<0.2"
 ```
 
 Example notebooks:
@@ -83,9 +101,9 @@ Example notebooks:
 
 * mathchat
 
-AutoGen offers an experimental agent for math problem solving. Please install with the [mathchat] option to use it.
+`pyautogen<0.2` offers an experimental agent for math problem solving. Please install with the [mathchat] option to use it.
 ```bash
-pip install "pyautogen[mathchat]"
+pip install "pyautogen[mathchat]<0.2"
 ```
 
 Example notebooks:
diff --git a/website/docs/Use-Cases/agent_chat.md b/website/docs/Use-Cases/agent_chat.md
index 9062c1b3690..d834b7f3248 100644
--- a/website/docs/Use-Cases/agent_chat.md
+++ b/website/docs/Use-Cases/agent_chat.md
@@ -99,7 +99,7 @@ The figure below shows six examples of applications built using AutoGen.
 
    - Automated Chess Game Playing & Chitchatting by GPT-4 Agents - [View Notebook](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_chess.ipynb)
    - Automated Continual Learning from New Data - [View Notebook](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_stream.ipynb)
-   - [OptiGuide](https://github.com/microsoft/optiguide) - Large Language Models for Supply Chain Optimization.
+   - [OptiGuide](https://github.com/microsoft/optiguide) - Coding, Tool Using, Safeguarding & Question Anwering for Supply Chain Optimization
 
 4. **Tool Use**
 
diff --git a/website/docs/Use-Cases/enhanced_inference.md b/website/docs/Use-Cases/enhanced_inference.md
index 8b5dd5c694e..5b5a1e81101 100644
--- a/website/docs/Use-Cases/enhanced_inference.md
+++ b/website/docs/Use-Cases/enhanced_inference.md
@@ -1,9 +1,10 @@
 # Enhanced Inference
 
-`autogen.Completion` is a drop-in replacement of `openai.Completion` and `openai.ChatCompletion` for enhanced LLM inference.
+`autogen.OpenAIWrapper` provides enhanced LLM inference for `openai>=1`.
+`autogen.Completion` is a drop-in replacement of `openai.Completion` and `openai.ChatCompletion` for enhanced LLM inference using `openai<1`.
 There are a number of benefits of using `autogen` to perform inference: performance tuning, API unification, caching, error handling, multi-config inference, result filtering, templating and so on.
 
-## Tune Inference Parameters
+## Tune Inference Parameters (for openai<1)
 
 *Links to notebook examples:*
 * [Optimize for Code Generation](https://github.com/microsoft/autogen/blob/main/notebook/oai_completion.ipynb)
@@ -108,69 +109,103 @@ The tuend config can be used to perform inference.
 
 ## API unification
 
-`autogen.Completion.create` is compatible with both `openai.Completion.create` and `openai.ChatCompletion.create`, and both OpenAI API and Azure OpenAI API. So models such as "text-davinci-003", "gpt-3.5-turbo" and "gpt-4" can share a common API.
-When chat models are used and `prompt` is given as the input to `autogen.Completion.create`, the prompt will be automatically converted into `messages` to fit the chat completion API requirement. One advantage is that one can experiment with both chat and non-chat models for the same prompt in a unified API.
+<!-- `autogen.Completion.create` is compatible with both `openai.Completion.create` and `openai.ChatCompletion.create`, and both OpenAI API and Azure OpenAI API. So models such as "text-davinci-003", "gpt-3.5-turbo" and "gpt-4" can share a common API.
+When chat models are used and `prompt` is given as the input to `autogen.Completion.create`, the prompt will be automatically converted into `messages` to fit the chat completion API requirement. One advantage is that one can experiment with both chat and non-chat models for the same prompt in a unified API. -->
+
+`autogen.OpenAIWrapper.create()` can be used to create completions for both chat and non-chat models, and both OpenAI API and Azure OpenAI API.
+
+```python
+from autogen import OpenAIWrapper
+# OpenAI endpoint
+client = OpenAIWrapper()
+# ChatCompletion
+response = client.create(messages=[{"role": "user", "content": "2+2="}], model="gpt-3.5-turbo")
+# extract the response text
+print(client.extract_text_or_function_call(response))
+# Azure OpenAI endpoint
+client = OpenAIWrapper(api_key=..., base_url=..., api_version=..., api_type="azure")
+# Completion
+response = client.create(prompt="2+2=", model="gpt-3.5-turbo-instruct")
+# extract the response text
+print(client.extract_text_or_function_call(response))
+
+```
 
 For local LLMs, one can spin up an endpoint using a package like [FastChat](https://github.com/lm-sys/FastChat), and then use the same API to send a request. See [here](/blog/2023/07/14/Local-LLMs) for examples on how to make inference with local LLMs.
 
-When only working with the chat-based models, `autogen.ChatCompletion` can be used. It also does automatic conversion from prompt to messages, if prompt is provided instead of messages.
+<!-- When only working with the chat-based models, `autogen.ChatCompletion` can be used. It also does automatic conversion from prompt to messages, if prompt is provided instead of messages. -->
 
 ## Caching
 
-API call results are cached locally and reused when the same request is issued. This is useful when repeating or continuing experiments for reproducibility and cost saving. It still allows controlled randomness by setting the "seed", using `set_cache` or specifying in `create()`.
+API call results are cached locally and reused when the same request is issued. This is useful when repeating or continuing experiments for reproducibility and cost saving. It still allows controlled randomness by setting the "seed" specified in `OpenAIWrapper.create()` or the constructor of `OpenAIWrapper`.
+
+```python
+client = OpenAIWrapper(seed=...)
+client.create(...)
+```
+
+```python
+client = OpenAIWrapper()
+client.create(seed=..., ...)
+```
+
+Caching is enabled by default with seed 41. To disable it please set `seed` to None.
 
 ## Error handling
 
 ### Runtime error
 
-It is easy to hit error when calling OpenAI APIs, due to connection, rate limit, or timeout. Some of the errors are transient. `autogen.Completion.create` deals with the transient errors and retries automatically. Request timeout, max retry period and retry wait time can be configured via `request_timeout`, `max_retry_period` and `retry_wait_time`.
+<!-- It is easy to hit error when calling OpenAI APIs, due to connection, rate limit, or timeout. Some of the errors are transient. `autogen.Completion.create` deals with the transient errors and retries automatically. Request timeout, max retry period and retry wait time can be configured via `request_timeout`, `max_retry_period` and `retry_wait_time`.
 
 - `request_timeout` (int): the timeout (in seconds) sent with a single request.
 - `max_retry_period` (int): the total time (in seconds) allowed for retrying failed requests.
 - `retry_wait_time` (int): the time interval to wait (in seconds) before retrying a failed request.
 
-Moreover, one can pass a list of configurations of different models/endpoints to mitigate the rate limits. For example,
+Moreover,  -->
+One can pass a list of configurations of different models/endpoints to mitigate the rate limits and other runtime error. For example,
 
 ```python
-response = autogen.Completion.create(
+client = OpenAIWrapper(
     config_list=[
         {
             "model": "gpt-4",
             "api_key": os.environ.get("AZURE_OPENAI_API_KEY"),
             "api_type": "azure",
-            "api_base": os.environ.get("AZURE_OPENAI_API_BASE"),
-            "api_version": "2023-07-01-preview",
+            "base_url": os.environ.get("AZURE_OPENAI_API_BASE"),
+            "api_version": "2023-08-01-preview",
         },
         {
             "model": "gpt-3.5-turbo",
             "api_key": os.environ.get("OPENAI_API_KEY"),
-            "api_type": "open_ai",
-            "api_base": "https://api.openai.com/v1",
-            "api_version": None,
+            "base_url": "https://api.openai.com/v1",
         },
         {
-            "model": "llama-7B",
-            "api_base": "http://127.0.0.1:8080",
-            "api_type": "open_ai",
-            "api_version": None,
+            "model": "llama2-chat-7B",
+            "base_url": "http://127.0.0.1:8080",
         }
     ],
-    prompt="Hi",
 )
 ```
 
-It will try querying Azure OpenAI gpt-4, OpenAI gpt-3.5-turbo, and a locally hosted llama-7B one by one, ignoring AuthenticationError, RateLimitError and Timeout,
+`client.create()` will try querying Azure OpenAI gpt-4, OpenAI gpt-3.5-turbo, and a locally hosted llama2-chat-7B one by one,
 until a valid result is returned. This can speed up the development process where the rate limit is a bottleneck. An error will be raised if the last choice fails. So make sure the last choice in the list has the best availability.
 
-For convenience, we provide a number of utility functions to load config lists, such as [`config_list_from_json`](/docs/reference/oai/openai_utils#config_list_from_json): The config list like the list of dicts above can be saved in an environment variable or a file in json format and loaded with this function.
+For convenience, we provide a number of utility functions to load config lists.
+- `get_config_list`: Generates configurations for API calls, primarily from provided API keys.
+- `config_list_openai_aoai`: Constructs a list of configurations using both Azure OpenAI and OpenAI endpoints, sourcing API keys from environment variables or local files.
+- `config_list_from_json`: Loads configurations from a JSON structure, either from an environment variable or a local JSON file, with the flexibility of filtering configurations based on given criteria.
+- `config_list_from_models`: Creates configurations based on a provided list of models, useful when targeting specific models without manually specifying each configuration.
+- `config_list_from_dotenv`: Constructs a configuration list from a `.env` file, offering a consolidated way to manage multiple API configurations and keys from a single file.
+
+We suggest that you take a look at this [notebook](https://github.com/microsoft/autogen/blob/main/notebook/oai_openai_utils.ipynb) for full code examples of the different methods to configure your model endpoints.
 
 ### Logic error
 
 Another type of error is that the returned response does not satisfy a requirement. For example, if the response is required to be a valid json string, one would like to filter the responses that are not. This can be achieved by providing a list of configurations and a filter function. For example,
 
 ```python
-def valid_json_filter(context, config, response):
-    for text in autogen.Completion.extract_text(response):
+def valid_json_filter(response, **_):
+    for text in OpenAIWrapper.extract_text_or_function_call(response):
         try:
             json.loads(text)
             return True
@@ -178,14 +213,16 @@ def valid_json_filter(context, config, response):
             pass
     return False
 
-response = autogen.Completion.create(
-    config_list=[{"model": "text-ada-001"}, {"model": "gpt-3.5-turbo"}, {"model": "text-davinci-003"}],
+client = OpenAIWrapper(
+    config_list=[{"model": "text-ada-001"}, {"model": "gpt-3.5-turbo-instruct"}, {"model": "text-davinci-003"}],
+)
+response = client.create(
     prompt="How to construct a json request to Bing API to search for 'latest AI news'? Return the JSON request.",
     filter_func=valid_json_filter,
 )
 ```
 
-The example above will try to use text-ada-001, gpt-3.5-turbo, and text-davinci-003 iteratively, until a valid json string is returned or the last config is used. One can also repeat the same model in the list for multiple times to try one model multiple times for increasing the robustness of the final response.
+The example above will try to use text-ada-001, gpt-3.5-turbo-instruct, and text-davinci-003 iteratively, until a valid json string is returned or the last config is used. One can also repeat the same model in the list for multiple times (with different seeds) to try one model multiple times for increasing the robustness of the final response.
 
 *Advanced use case: Check this [blogpost](/blog/2023/05/18/GPT-adaptive-humaneval) to find how to improve GPT-4's coding performance from 68% to 90% while reducing the inference cost.*
 
@@ -194,7 +231,7 @@ The example above will try to use text-ada-001, gpt-3.5-turbo, and text-davinci-
 If the provided prompt or message is a template, it will be automatically materialized with a given context. For example,
 
 ```python
-response = autogen.Completion.create(
+response = client.create(
     context={"problem": "How many positive integers, not exceeding 100, are multiples of 2 or 3 but not 4?"},
     prompt="{problem} Solve the problem carefully.",
     allow_format_str_template=True,
@@ -228,11 +265,11 @@ context = {
     "external_info_0": "Problem 1: ...",
 }
 
-response = autogen.ChatCompletion.create(context, messages=messages, **config)
+response = client.create(context=context, messages=messages, **config)
 messages.append(
     {
         "role": "assistant",
-        "content": autogen.ChatCompletion.extract_text(response)[0]
+        "content": client.extract_text(response)[0]
     }
 )
 messages.append(
@@ -247,10 +284,10 @@ context.append(
         "external_info_1": "Theorem 1: ...",
     }
 )
-response = autogen.ChatCompletion.create(context, messages=messages, **config)
+response = client.create(context=context, messages=messages, **config)
 ```
 
-## Logging (Experimental)
+## Logging (for openai<1)
 
 When debugging or diagnosing an LLM-based system, it is often convenient to log the API calls and analyze them. `autogen.Completion` and `autogen.ChatCompletion` offer an easy way to collect the API call histories. For example, to log the chat histories, simply run:
 ```python