From 3396c466d9256d3d756d6239075a5fd8ae58a3f2 Mon Sep 17 00:00:00 2001
From: kevin666aa <yrwu000627@gmail.com>
Date: Mon, 15 Jan 2024 15:11:38 -0500
Subject: [PATCH 1/4] update

---
 autogen/agent_utils.py                 |  28 +++
 autogen/agentchat/conversable_agent.py |  23 +++
 notebook/oai_client_cost.ipynb         | 228 ++++++++++++++++++++++++-
 test/agentchat/test_agent_usage.py     | 114 +++++++++++++
 4 files changed, 386 insertions(+), 7 deletions(-)
 create mode 100644 autogen/agent_utils.py
 create mode 100644 test/agentchat/test_agent_usage.py

diff --git a/autogen/agent_utils.py b/autogen/agent_utils.py
new file mode 100644
index 00000000000..bd7a1e27a54
--- /dev/null
+++ b/autogen/agent_utils.py
@@ -0,0 +1,28 @@
+from typing import List, Dict, Tuple
+from autogen import Agent
+
+
+def gather_usage_summary(agents: List[Agent]) -> Tuple[Dict[str, any], Dict[str, any]]:
+    def aggregate_summary(usage_summary: Dict[str, any], agent_summary: Dict[str, any]) -> None:
+        if agent_summary is None:
+            return
+        usage_summary["total_cost"] += agent_summary.get("total_cost", 0)
+        for model, data in agent_summary.items():
+            if model != "total_cost":
+                if model not in usage_summary:
+                    usage_summary[model] = data.copy()
+                else:
+                    usage_summary[model]["cost"] += data.get("cost", 0)
+                    usage_summary[model]["prompt_tokens"] += data.get("prompt_tokens", 0)
+                    usage_summary[model]["completion_tokens"] += data.get("completion_tokens", 0)
+                    usage_summary[model]["total_tokens"] += data.get("total_tokens", 0)
+
+    total_usage_summary = {"total_cost": 0}
+    actual_usage_summary = {"total_cost": 0}
+
+    for agent in agents:
+        if agent.client:
+            aggregate_summary(total_usage_summary, agent.client.total_usage_summary)
+            aggregate_summary(actual_usage_summary, agent.client.actual_usage_summary)
+
+    return total_usage_summary, actual_usage_summary
diff --git a/autogen/agentchat/conversable_agent.py b/autogen/agentchat/conversable_agent.py
index 0a94905ffa8..ef8a4c13d90 100644
--- a/autogen/agentchat/conversable_agent.py
+++ b/autogen/agentchat/conversable_agent.py
@@ -694,6 +694,8 @@ def reset(self):
         self.clear_history()
         self.reset_consecutive_auto_reply_counter()
         self.stop_reply_at_receive()
+        if self.client is not None:
+            self.client.clear_usage_summary()
         for reply_func_tuple in self._reply_func_list:
             if reply_func_tuple["reset_config"] is not None:
                 reply_func_tuple["reset_config"](reply_func_tuple["config"])
@@ -1887,3 +1889,24 @@ def process_last_message(self, messages):
         messages = messages.copy()
         messages[-1]["content"] = processed_user_text
         return messages
+
+    def print_usage_summary(self, mode: Union[str, List[str]] = ["actual", "total"]) -> None:
+        if self.client is None:
+            print(f"No cost incurred from agent '{self.name}'.")
+        else:
+            print(f"Agent '{self.name}':")
+            self.client.print_usage_summary(mode)
+
+    def get_actual_usage(self) -> Union[None, Dict[str, int]]:
+        """Get the actual usage summary."""
+        if self.client is None:
+            return None
+        else:
+            return self.client.actual_usage_summary
+
+    def get_total_usage(self) -> Union[None, Dict[str, int]]:
+        """Get the total usage summary."""
+        if self.client is None:
+            return None
+        else:
+            return self.client.total_usage_summary
diff --git a/notebook/oai_client_cost.ipynb b/notebook/oai_client_cost.ipynb
index 7798f315abb..67240c10289 100644
--- a/notebook/oai_client_cost.ipynb
+++ b/notebook/oai_client_cost.ipynb
@@ -24,6 +24,16 @@
     "\n",
     "Reset your session's usage data with `clear_usage_summary()` when needed.\n",
     "\n",
+    "We also support cost estimation for agents. use `Agent.print_usage_summary()` to print the cost summary for the agent.\n",
+    "You can retrieve usage summary in a dict using `Agent.get_actual_usage()` and `Agent.get_total_usage()`. Can `Agent.reset()` will also reset the usage summary.\n",
+    "\n",
+    "To gather usage data for a list of agents, we provide an utility function `autogen.agent_utils.gather_usage_summary` where you pass in a list of agents and gather the usage summary.\n",
+    "\n",
+    "## Caution when using Azure OpenAI!\n",
+    "If you are using azure OpenAI, the model returned from completion doesn't have the version information. The returned model is either 'gpt-35-turbo' or 'gpt-4'. From there, we are calculating the cost based on gpt-3.5-0613: ((0.0015, 0.002) per 1k prompt and completion tokens) and gpt-4-0613: (0.03,0.06). This means the cost is wrong if you are using the 1106 version of the models from azure OpenAI.\n",
+    "\n",
+    "This will be improved in the future. However, the token count summary is accurate. You can use the token count to calculate the cost yourself.\n",
+    "\n",
     "## Requirements\n",
     "\n",
     "AutoGen requires `Python>=3.8`:\n",
@@ -43,12 +53,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
     "import autogen\n",
     "from autogen import OpenAIWrapper\n",
+    "from autogen import AssistantAgent, UserProxyAgent\n",
+    "from autogen.agent_utils import gather_usage_summary\n",
     "\n",
     "# config_list = autogen.config_list_from_json(\n",
     "#     \"OAI_CONFIG_LIST\",\n",
@@ -60,7 +72,7 @@
     "config_list = autogen.config_list_from_json(\n",
     "    \"OAI_CONFIG_LIST\",\n",
     "    filter_dict={\n",
-    "        \"model\": [\"gpt-3.5-turbo\", \"gpt-35-turbo\"],\n",
+    "        \"model\": [\"gpt-35-turbo-0613\"],\n",
     "    },\n",
     ")"
    ]
@@ -79,7 +91,7 @@
     "        \"api_key\": \"<your OpenAI API key>\",\n",
     "    },  # OpenAI API endpoint for gpt-4\n",
     "    {\n",
-    "        \"model\": \"gpt-35-turbo-0631\",  # 0631 or newer is needed to use functions\n",
+    "        \"model\": \"gpt-35-turbo-0613\",  # 0631 or newer is needed to use functions\n",
     "        \"base_url\": \"<your Azure OpenAI API base>\", \n",
     "        \"api_type\": \"azure\", \n",
     "        \"api_version\": \"2023-08-01-preview\", # 2023-07-01-preview or newer is needed to use functions\n",
@@ -107,8 +119,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "In update_usage_summary\n",
-      "0.0001555\n"
+      "0.0003215\n"
      ]
     }
    ],
@@ -125,7 +136,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Usage Summary\n",
+    "## Usage Summary for OpenAIWrapper\n",
     "\n",
     "When creating a instance of OpenAIWrapper, cost of all completions from the same instance is recorded. You can call `print_usage_summary()` to checkout your usage summary. To clear up, use `clear_usage_summary()`.\n"
    ]
@@ -283,6 +294,209 @@
     "response = client.create(messages=messages, model=\"gpt-35-turbo-1106\", cache_seed=41)\n",
     "client.print_usage_summary()"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Usage Summary for Agents\n",
+    "\n",
+    "- `Agent.print_usage_summary()` will print the cost summary for the agent.\n",
+    "- `Agent.get_actual_usage()` and `Agent.get_total_usage()` will return the usage summary in a dict. When an agent doesn't use LLM, they will return None.\n",
+    "- `Agent.reset()` will reset the usage summary.\n",
+    "- `autogen.agent_utils.gather_usage_summary` will gather the usage summary for a list of agents."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[33mai_user\u001b[0m (to assistant):\n",
+      "\n",
+      "$x^3=125$. What is x?\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[33massistant\u001b[0m (to ai_user):\n",
+      "\n",
+      "To find the value of x, we need to find the cube root of 125. \n",
+      "\n",
+      "The cube root of 125 is 5. \n",
+      "\n",
+      "Therefore, x = 5.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33mai_user\u001b[0m (to assistant):\n",
+      "\n",
+      "Great job! Your answer is correct.\n",
+      "\n",
+      "Indeed, to find the value of x in the equation $x^3 = 125$, we need to find the cube root of 125. The cube root of 125 is indeed 5.\n",
+      "\n",
+      "Therefore, x = 5 is the correct solution. Well done!\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33massistant\u001b[0m (to ai_user):\n",
+      "\n",
+      "Thank you! I'm glad I could assist you. If you have any more questions, feel free to ask.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "assistant = AssistantAgent(\n",
+    "    \"assistant\",\n",
+    "    system_message=\"You are a helpful assistant.\",\n",
+    "    llm_config={\n",
+    "        \"timeout\": 600,\n",
+    "        \"cache_seed\": None,\n",
+    "        \"config_list\": config_list,\n",
+    "    },\n",
+    ")\n",
+    "\n",
+    "ai_user_proxy = UserProxyAgent(\n",
+    "    name=\"ai_user\",\n",
+    "    human_input_mode=\"NEVER\",\n",
+    "    max_consecutive_auto_reply=1,\n",
+    "    code_execution_config=False,\n",
+    "    llm_config={\n",
+    "        \"config_list\": config_list,\n",
+    "    },\n",
+    "    # In the system message the \"user\" always refers to the other agent.\n",
+    "    system_message=\"You ask a user for help. You check the answer from the user and provide feedback.\",\n",
+    ")\n",
+    "assistant.reset()\n",
+    "\n",
+    "math_problem = \"$x^3=125$. What is x?\"\n",
+    "ai_user_proxy.initiate_chat(\n",
+    "    assistant,\n",
+    "    message=math_problem,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Agent 'ai_user':\n",
+      "----------------------------------------------------------------------------------------------------\n",
+      "Usage summary excluding cached usage: \n",
+      "Total cost: 0.00025\n",
+      "* Model 'gpt-35-turbo': cost: 0.00025, prompt_tokens: 80, completion_tokens: 63, total_tokens: 143\n",
+      "\n",
+      "All completions are non-cached: the total cost with cached completions is the same as actual cost.\n",
+      "----------------------------------------------------------------------------------------------------\n",
+      "\n",
+      "Agent 'assistant':\n",
+      "----------------------------------------------------------------------------------------------------\n",
+      "Usage summary excluding cached usage: \n",
+      "Total cost: 0.00036\n",
+      "* Model 'gpt-35-turbo': cost: 0.00036, prompt_tokens: 162, completion_tokens: 60, total_tokens: 222\n",
+      "\n",
+      "All completions are non-cached: the total cost with cached completions is the same as actual cost.\n",
+      "----------------------------------------------------------------------------------------------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "ai_user_proxy.print_usage_summary()\n",
+    "print()\n",
+    "assistant.print_usage_summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "No cost incurred from agent 'user'.\n"
+     ]
+    }
+   ],
+   "source": [
+    "user_proxy = UserProxyAgent(\n",
+    "    name=\"user\",\n",
+    "    human_input_mode=\"NEVER\",\n",
+    "    max_consecutive_auto_reply=2,\n",
+    "    code_execution_config=False,\n",
+    "    default_auto_reply=\"That's all. Thank you.\",\n",
+    ")\n",
+    "user_proxy.print_usage_summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Actual usage summary for assistant (excluding completion from cache): {'total_cost': 0.00036300000000000004, 'gpt-35-turbo': {'cost': 0.00036300000000000004, 'prompt_tokens': 162, 'completion_tokens': 60, 'total_tokens': 222}}\n",
+      "Total usage summary for assistant (including completion from cache): {'total_cost': 0.00036300000000000004, 'gpt-35-turbo': {'cost': 0.00036300000000000004, 'prompt_tokens': 162, 'completion_tokens': 60, 'total_tokens': 222}}\n",
+      "Actual usage summary for ai_user_proxy: {'total_cost': 0.000246, 'gpt-35-turbo': {'cost': 0.000246, 'prompt_tokens': 80, 'completion_tokens': 63, 'total_tokens': 143}}\n",
+      "Total usage summary for ai_user_proxy: {'total_cost': 0.000246, 'gpt-35-turbo': {'cost': 0.000246, 'prompt_tokens': 80, 'completion_tokens': 63, 'total_tokens': 143}}\n",
+      "Actual usage summary for user_proxy: None\n",
+      "Total usage summary for user_proxy: None\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"Actual usage summary for assistant (excluding completion from cache):\", assistant.get_actual_usage())\n",
+    "print(\"Total usage summary for assistant (including completion from cache):\", assistant.get_total_usage())\n",
+    "\n",
+    "print(\"Actual usage summary for ai_user_proxy:\", ai_user_proxy.get_actual_usage())\n",
+    "print(\"Total usage summary for ai_user_proxy:\", ai_user_proxy.get_total_usage())\n",
+    "\n",
+    "print(\"Actual usage summary for user_proxy:\", user_proxy.get_actual_usage())\n",
+    "print(\"Total usage summary for user_proxy:\", user_proxy.get_total_usage())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'total_cost': 0.0006090000000000001,\n",
+       " 'gpt-35-turbo': {'cost': 0.0006090000000000001,\n",
+       "  'prompt_tokens': 242,\n",
+       "  'completion_tokens': 123,\n",
+       "  'total_tokens': 365}}"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "total_usage_summary, actual_usage_summary = gather_usage_summary([assistant, ai_user_proxy, user_proxy])\n",
+    "total_usage_summary"
+   ]
   }
  ],
  "metadata": {
@@ -301,7 +515,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.6"
+   "version": "3.9.18"
   }
  },
  "nbformat": 4,
diff --git a/test/agentchat/test_agent_usage.py b/test/agentchat/test_agent_usage.py
new file mode 100644
index 00000000000..df8ce274e21
--- /dev/null
+++ b/test/agentchat/test_agent_usage.py
@@ -0,0 +1,114 @@
+from autogen.agent_utils import gather_usage_summary
+from autogen import AssistantAgent, UserProxyAgent
+from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST
+import pytest
+from conftest import skip_openai
+
+try:
+    import openai
+except ImportError:
+    skip = True
+else:
+    skip = False or skip_openai
+
+
+def test_gathering():
+    assistant1 = AssistantAgent(
+        "assistant",
+        system_message="You are a helpful assistant.",
+        llm_config={
+            "config_list": OAI_CONFIG_LIST,
+        },
+    )
+    assistant2 = AssistantAgent(
+        "assistant",
+        system_message="You are a helpful assistant.",
+        llm_config={
+            "config_list": OAI_CONFIG_LIST,
+        },
+    )
+    assistant3 = AssistantAgent(
+        "assistant",
+        system_message="You are a helpful assistant.",
+        llm_config={
+            "config_list": OAI_CONFIG_LIST,
+        },
+    )
+
+    assistant1.client.total_usage_summary = {
+        "total_cost": 0.1,
+        "gpt-35-turbo": {"cost": 0.1, "prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300},
+    }
+    assistant2.client.total_usage_summary = {
+        "total_cost": 0.2,
+        "gpt-35-turbo": {"cost": 0.2, "prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300},
+    }
+    assistant3.client.total_usage_summary = {
+        "total_cost": 0.3,
+        "gpt-4": {"cost": 0.3, "prompt_tokens": 100, "completion_tokens": 200, "total_tokens": 300},
+    }
+
+    total_usage, _ = gather_usage_summary([assistant1, assistant2, assistant3])
+
+    assert round(total_usage["total_cost"], 8) == 0.6
+    assert round(total_usage["gpt-35-turbo"]["cost"], 8) == 0.3
+    assert round(total_usage["gpt-4"]["cost"], 8) == 0.3
+
+    # test when agent doesn't have client
+    user_proxy = UserProxyAgent(
+        name="ai_user",
+        human_input_mode="NEVER",
+        max_consecutive_auto_reply=2,
+        code_execution_config=False,
+        default_auto_reply="That's all. Thank you.",
+    )
+
+    total_usage, acutal_usage = gather_usage_summary([user_proxy])
+
+
+@pytest.mark.skipif(skip, reason="openai not installed OR requested to skip")
+def test_agent_usage():
+    assistant = AssistantAgent(
+        "assistant",
+        system_message="You are a helpful assistant.",
+        llm_config={
+            "timeout": 600,
+            "cache_seed": None,
+            "config_list": OAI_CONFIG_LIST,
+        },
+    )
+
+    ai_user_proxy = UserProxyAgent(
+        name="ai_user",
+        human_input_mode="NEVER",
+        max_consecutive_auto_reply=1,
+        code_execution_config=False,
+        llm_config={
+            "config_list": OAI_CONFIG_LIST,
+        },
+        # In the system message the "user" always refers to the other agent.
+        system_message="You ask a user for help. You check the answer from the user and provide feedback.",
+    )
+
+    math_problem = "$x^3=125$. What is x?"
+    ai_user_proxy.initiate_chat(
+        assistant,
+        message=math_problem,
+    )
+
+    # test print
+    ai_user_proxy.print_usage_summary()
+    print()
+    assistant.print_usage_summary()
+
+    # test get
+    print("Actual usage summary (excluding completion from cache):", assistant.get_actual_usage())
+    print("Total usage summary (including completion from cache):", assistant.get_total_usage())
+
+    print("Actual usage summary (excluding completion from cache):", ai_user_proxy.get_actual_usage())
+    print("Total usage summary (including completion from cache):", ai_user_proxy.get_total_usage())
+
+
+if __name__ == "__main__":
+    test_gathering()
+    test_agent_usage()

From eb390c09c46676223663b4a3a6c804b240a976a8 Mon Sep 17 00:00:00 2001
From: kevin666aa <yrwu000627@gmail.com>
Date: Mon, 15 Jan 2024 15:31:06 -0500
Subject: [PATCH 2/4] update

---
 autogen/oai/openai_utils.py        |  2 +-
 test/agentchat/test_agent_usage.py | 25 ++++++++++++++++++++-----
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/autogen/oai/openai_utils.py b/autogen/oai/openai_utils.py
index 66332e4f909..e900e214314 100644
--- a/autogen/oai/openai_utils.py
+++ b/autogen/oai/openai_utils.py
@@ -42,7 +42,7 @@
     "gpt-4-0613": (0.03, 0.06),
     "gpt-4-32k-0613": (0.06, 0.12),
     # 11-06
-    "gpt-3.5-turbo": (0.001, 0.002),
+    "gpt-3.5-turbo": (0.0015, 0.002),  # default is still 0613
     "gpt-3.5-turbo-1106": (0.001, 0.002),
     "gpt-35-turbo-1106": (0.001, 0.002),
     "gpt-4-1106-preview": (0.01, 0.03),
diff --git a/test/agentchat/test_agent_usage.py b/test/agentchat/test_agent_usage.py
index df8ce274e21..ce095c7901c 100644
--- a/test/agentchat/test_agent_usage.py
+++ b/test/agentchat/test_agent_usage.py
@@ -3,6 +3,7 @@
 from test_assistant_agent import KEY_LOC, OAI_CONFIG_LIST
 import pytest
 from conftest import skip_openai
+import autogen
 
 try:
     import openai
@@ -12,26 +13,34 @@
     skip = False or skip_openai
 
 
+@pytest.mark.skipif(skip, reason="openai not installed OR requested to skip")
 def test_gathering():
+    config_list = autogen.config_list_from_json(
+        OAI_CONFIG_LIST,
+        file_location=KEY_LOC,
+    )
     assistant1 = AssistantAgent(
         "assistant",
         system_message="You are a helpful assistant.",
         llm_config={
-            "config_list": OAI_CONFIG_LIST,
+            "config_list": config_list,
+            "model": "gpt-3.5-turbo-0613",
         },
     )
     assistant2 = AssistantAgent(
         "assistant",
         system_message="You are a helpful assistant.",
         llm_config={
-            "config_list": OAI_CONFIG_LIST,
+            "config_list": config_list,
+            "model": "gpt-3.5-turbo-0613",
         },
     )
     assistant3 = AssistantAgent(
         "assistant",
         system_message="You are a helpful assistant.",
         llm_config={
-            "config_list": OAI_CONFIG_LIST,
+            "config_list": config_list,
+            "model": "gpt-3.5-turbo-0613",
         },
     )
 
@@ -68,13 +77,18 @@ def test_gathering():
 
 @pytest.mark.skipif(skip, reason="openai not installed OR requested to skip")
 def test_agent_usage():
+    config_list = autogen.config_list_from_json(
+        OAI_CONFIG_LIST,
+        file_location=KEY_LOC,
+    )
     assistant = AssistantAgent(
         "assistant",
         system_message="You are a helpful assistant.",
         llm_config={
             "timeout": 600,
             "cache_seed": None,
-            "config_list": OAI_CONFIG_LIST,
+            "config_list": config_list,
+            "model": "gpt-3.5-turbo-0613",
         },
     )
 
@@ -84,7 +98,8 @@ def test_agent_usage():
         max_consecutive_auto_reply=1,
         code_execution_config=False,
         llm_config={
-            "config_list": OAI_CONFIG_LIST,
+            "config_list": config_list,
+            "model": "gpt-3.5-turbo-0613",
         },
         # In the system message the "user" always refers to the other agent.
         system_message="You ask a user for help. You check the answer from the user and provide feedback.",

From d03e3597bce1d55e4a3a16216a001e9dce62d9d1 Mon Sep 17 00:00:00 2001
From: Qingyun Wu <qingyun.wu@psu.edu>
Date: Mon, 15 Jan 2024 19:28:06 -0500
Subject: [PATCH 3/4] Update notebook/oai_client_cost.ipynb

Co-authored-by: Chi Wang <wang.chi@microsoft.com>
---
 notebook/oai_client_cost.ipynb | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/notebook/oai_client_cost.ipynb b/notebook/oai_client_cost.ipynb
index 67240c10289..63bdeb48d7b 100644
--- a/notebook/oai_client_cost.ipynb
+++ b/notebook/oai_client_cost.ipynb
@@ -91,7 +91,7 @@
     "        \"api_key\": \"<your OpenAI API key>\",\n",
     "    },  # OpenAI API endpoint for gpt-4\n",
     "    {\n",
-    "        \"model\": \"gpt-35-turbo-0613\",  # 0631 or newer is needed to use functions\n",
+    "        \"model\": \"gpt-35-turbo-0613\",  # 0613 or newer is needed to use functions\n",
     "        \"base_url\": \"<your Azure OpenAI API base>\", \n",
     "        \"api_type\": \"azure\", \n",
     "        \"api_version\": \"2023-08-01-preview\", # 2023-07-01-preview or newer is needed to use functions\n",

From 73030f8f437a22459ac3502989e725525700460d Mon Sep 17 00:00:00 2001
From: kevin666aa <yrwu000627@gmail.com>
Date: Mon, 15 Jan 2024 20:14:19 -0500
Subject: [PATCH 4/4] update doc and test

---
 autogen/agent_utils.py                        | 23 +++++++++++++++++++
 autogen/agentchat/conversable_agent.py        |  1 +
 ...nb => agentchat_cost_token_tracking.ipynb} | 20 ++++++++--------
 test/agentchat/test_agent_usage.py            | 16 ++++++++++---
 test/test_notebook.py                         |  4 ++--
 5 files changed, 50 insertions(+), 14 deletions(-)
 rename notebook/{oai_client_cost.ipynb => agentchat_cost_token_tracking.ipynb} (96%)

diff --git a/autogen/agent_utils.py b/autogen/agent_utils.py
index bd7a1e27a54..431d03c78d0 100644
--- a/autogen/agent_utils.py
+++ b/autogen/agent_utils.py
@@ -3,6 +3,29 @@
 
 
 def gather_usage_summary(agents: List[Agent]) -> Tuple[Dict[str, any], Dict[str, any]]:
+    """Gather usage summary from all agents.
+
+    Args:
+        agents: (list): List of agents.
+
+    Returns:
+        tuple: (total_usage_summary, actual_usage_summary)
+
+    Example return:
+        total_usage_summary = {
+            'total_cost': 0.0006090000000000001,
+            'gpt-35-turbo':
+                {
+                    'cost': 0.0006090000000000001,
+                    'prompt_tokens': 242,
+                    'completion_tokens': 123,
+                    'total_tokens': 365
+                }
+        }
+        `actual_usage_summary` follows the same format.
+        If none of the agents incurred any cost (not having a client), then the total_usage_summary and actual_usage_summary will be {'total_cost': 0}.
+    """
+
     def aggregate_summary(usage_summary: Dict[str, any], agent_summary: Dict[str, any]) -> None:
         if agent_summary is None:
             return
diff --git a/autogen/agentchat/conversable_agent.py b/autogen/agentchat/conversable_agent.py
index ef8a4c13d90..0d02a3d4858 100644
--- a/autogen/agentchat/conversable_agent.py
+++ b/autogen/agentchat/conversable_agent.py
@@ -1891,6 +1891,7 @@ def process_last_message(self, messages):
         return messages
 
     def print_usage_summary(self, mode: Union[str, List[str]] = ["actual", "total"]) -> None:
+        """Print the usage summary."""
         if self.client is None:
             print(f"No cost incurred from agent '{self.name}'.")
         else:
diff --git a/notebook/oai_client_cost.ipynb b/notebook/agentchat_cost_token_tracking.ipynb
similarity index 96%
rename from notebook/oai_client_cost.ipynb
rename to notebook/agentchat_cost_token_tracking.ipynb
index 67240c10289..3bb764b410a 100644
--- a/notebook/oai_client_cost.ipynb
+++ b/notebook/agentchat_cost_token_tracking.ipynb
@@ -15,7 +15,8 @@
     "\n",
     "Licensed under the MIT License.\n",
     "\n",
-    "# Use AutoGen's OpenAIWrapper for cost estimation\n",
+    "# Usage tracking with AtuoGen\n",
+    "## 1. Use AutoGen's OpenAIWrapper for cost estimation\n",
     "The `OpenAIWrapper` from `autogen` tracks token counts and costs of your API calls. Use the `create()` method to initiate requests and `print_usage_summary()` to retrieve a detailed usage report, including total cost and token usage for both cached and actual requests.\n",
     "\n",
     "- `mode=[\"actual\", \"total\"]` (default): print usage summary for non-caching completions and all completions (including cache).\n",
@@ -24,10 +25,11 @@
     "\n",
     "Reset your session's usage data with `clear_usage_summary()` when needed.\n",
     "\n",
-    "We also support cost estimation for agents. use `Agent.print_usage_summary()` to print the cost summary for the agent.\n",
-    "You can retrieve usage summary in a dict using `Agent.get_actual_usage()` and `Agent.get_total_usage()`. Can `Agent.reset()` will also reset the usage summary.\n",
+    "## 2. Track cost and token count for agents\n",
+    "We also support cost estimation for agents. Use `Agent.print_usage_summary()` to print the cost summary for the agent.\n",
+    "You can retrieve usage summary in a dict using `Agent.get_actual_usage()` and `Agent.get_total_usage()`. Note that `Agent.reset()` will also reset the usage summary.\n",
     "\n",
-    "To gather usage data for a list of agents, we provide an utility function `autogen.agent_utils.gather_usage_summary` where you pass in a list of agents and gather the usage summary.\n",
+    "To gather usage data for a list of agents, we provide an utility function `autogen.agent_utils.gather_usage_summary(agents)` where you pass in a list of agents and gather the usage summary.\n",
     "\n",
     "## Caution when using Azure OpenAI!\n",
     "If you are using azure OpenAI, the model returned from completion doesn't have the version information. The returned model is either 'gpt-35-turbo' or 'gpt-4'. From there, we are calculating the cost based on gpt-3.5-0613: ((0.0015, 0.002) per 1k prompt and completion tokens) and gpt-4-0613: (0.03,0.06). This means the cost is wrong if you are using the 1106 version of the models from azure OpenAI.\n",
@@ -53,7 +55,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -72,7 +74,7 @@
     "config_list = autogen.config_list_from_json(\n",
     "    \"OAI_CONFIG_LIST\",\n",
     "    filter_dict={\n",
-    "        \"model\": [\"gpt-35-turbo-0613\"],\n",
+    "        \"model\": [\"gpt-3.5-turbo\", \"gpt-35-turbo\"],\n",
     "    },\n",
     ")"
    ]
@@ -112,14 +114,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "0.0003215\n"
+      "0.0003535\n"
      ]
     }
    ],
@@ -128,7 +130,7 @@
     "messages = [\n",
     "    {\"role\": \"user\", \"content\": \"Can you give me 3 useful tips on learning Python? Keep it simple and short.\"},\n",
     "]\n",
-    "response = client.create(messages=messages, model=\"gpt-35-turbo-1106\", cache_seed=None)\n",
+    "response = client.create(messages=messages, model=\"gpt-3.5-turbo\", cache_seed=None)\n",
     "print(response.cost)"
    ]
   },
diff --git a/test/agentchat/test_agent_usage.py b/test/agentchat/test_agent_usage.py
index ce095c7901c..d5188cc561b 100644
--- a/test/agentchat/test_agent_usage.py
+++ b/test/agentchat/test_agent_usage.py
@@ -4,6 +4,8 @@
 import pytest
 from conftest import skip_openai
 import autogen
+import io
+from contextlib import redirect_stdout
 
 try:
     import openai
@@ -112,9 +114,17 @@ def test_agent_usage():
     )
 
     # test print
-    ai_user_proxy.print_usage_summary()
-    print()
-    assistant.print_usage_summary()
+    captured_output = io.StringIO()
+    with redirect_stdout(captured_output):
+        ai_user_proxy.print_usage_summary()
+    output = captured_output.getvalue()
+    assert "Usage summary excluding cached usage:" in output
+
+    captured_output = io.StringIO()
+    with redirect_stdout(captured_output):
+        assistant.print_usage_summary()
+    output = captured_output.getvalue()
+    assert "All completions are non-cached:" in output
 
     # test get
     print("Actual usage summary (excluding completion from cache):", assistant.get_actual_usage())
diff --git a/test/test_notebook.py b/test/test_notebook.py
index 6e8f80b5f8a..109410dfc0e 100644
--- a/test/test_notebook.py
+++ b/test/test_notebook.py
@@ -122,8 +122,8 @@ def test_graph_modelling_language_using_select_speaker(save=False):
     skip or not sys.version.startswith("3.10"),
     reason="do not run if openai is not installed or py!=3.10",
 )
-def test_oai_client_cost(save=False):
-    run_notebook("oai_client_cost.ipynb", save=save)
+def test_agentchat_cost_token_tracking(save=False):
+    run_notebook("agentchat_cost_token_tracking.ipynb", save=save)
 
 
 if __name__ == "__main__":