microsoft · sonichi · Jan 17, 2024 · Jan 15, 2024 · Jan 15, 2024 · Jan 15, 2024
diff --git a/autogen/agent_utils.py b/autogen/agent_utils.py
@@ -0,0 +1,28 @@
+from typing import List, Dict, Tuple
+from autogen import Agent
+
+
+def gather_usage_summary(agents: List[Agent]) -> Tuple[Dict[str, any], Dict[str, any]]:
+    def aggregate_summary(usage_summary: Dict[str, any], agent_summary: Dict[str, any]) -> None:
+        if agent_summary is None:
+            return
+        usage_summary["total_cost"] += agent_summary.get("total_cost", 0)
+        for model, data in agent_summary.items():
+            if model != "total_cost":
+                if model not in usage_summary:
+                    usage_summary[model] = data.copy()
+                else:
+                    usage_summary[model]["cost"] += data.get("cost", 0)
+                    usage_summary[model]["prompt_tokens"] += data.get("prompt_tokens", 0)
+                    usage_summary[model]["completion_tokens"] += data.get("completion_tokens", 0)
+                    usage_summary[model]["total_tokens"] += data.get("total_tokens", 0)
+
+    total_usage_summary = {"total_cost": 0}
+    actual_usage_summary = {"total_cost": 0}
+
+    for agent in agents:
+        if agent.client:
+            aggregate_summary(total_usage_summary, agent.client.total_usage_summary)
+            aggregate_summary(actual_usage_summary, agent.client.actual_usage_summary)
+
+    return total_usage_summary, actual_usage_summary
diff --git a/autogen/agentchat/conversable_agent.py b/autogen/agentchat/conversable_agent.py
@@ -694,6 +694,8 @@ def reset(self):
         self.clear_history()
         self.reset_consecutive_auto_reply_counter()
         self.stop_reply_at_receive()
+        if self.client is not None:
+            self.client.clear_usage_summary()
         for reply_func_tuple in self._reply_func_list:
             if reply_func_tuple["reset_config"] is not None:
                 reply_func_tuple["reset_config"](reply_func_tuple["config"])
@@ -1887,3 +1889,24 @@ def process_last_message(self, messages):
         messages = messages.copy()
         messages[-1]["content"] = processed_user_text
         return messages
+
+    def print_usage_summary(self, mode: Union[str, List[str]] = ["actual", "total"]) -> None:
+        if self.client is None:
+            print(f"No cost incurred from agent '{self.name}'.")
+        else:
+            print(f"Agent '{self.name}':")
+            self.client.print_usage_summary(mode)
+
+    def get_actual_usage(self) -> Union[None, Dict[str, int]]:
+        """Get the actual usage summary."""
+        if self.client is None:
+            return None
+        else:
+            return self.client.actual_usage_summary
+
+    def get_total_usage(self) -> Union[None, Dict[str, int]]:
+        """Get the total usage summary."""
+        if self.client is None:
+            return None
+        else:
+            return self.client.total_usage_summary
diff --git a/autogen/oai/openai_utils.py b/autogen/oai/openai_utils.py
@@ -42,7 +42,7 @@
     "gpt-4-0613": (0.03, 0.06),
     "gpt-4-32k-0613": (0.06, 0.12),
     # 11-06
-    "gpt-3.5-turbo": (0.001, 0.002),
+    "gpt-3.5-turbo": (0.0015, 0.002),  # default is still 0613
     "gpt-3.5-turbo-1106": (0.001, 0.002),
     "gpt-35-turbo-1106": (0.001, 0.002),
     "gpt-4-1106-preview": (0.01, 0.03),

diff --git a/notebook/oai_client_cost.ipynb b/notebook/oai_client_cost.ipynb
@@ -24,6 +24,16 @@
     "\n",
     "Reset your session's usage data with `clear_usage_summary()` when needed.\n",
     "\n",
+    "We also support cost estimation for agents. use `Agent.print_usage_summary()` to print the cost summary for the agent.\n",
+    "You can retrieve usage summary in a dict using `Agent.get_actual_usage()` and `Agent.get_total_usage()`. Can `Agent.reset()` will also reset the usage summary.\n",
+    "\n",
+    "To gather usage data for a list of agents, we provide an utility function `autogen.agent_utils.gather_usage_summary` where you pass in a list of agents and gather the usage summary.\n",
+    "\n",
+    "## Caution when using Azure OpenAI!\n",
+    "If you are using azure OpenAI, the model returned from completion doesn't have the version information. The returned model is either 'gpt-35-turbo' or 'gpt-4'. From there, we are calculating the cost based on gpt-3.5-0613: ((0.0015, 0.002) per 1k prompt and completion tokens) and gpt-4-0613: (0.03,0.06). This means the cost is wrong if you are using the 1106 version of the models from azure OpenAI.\n",
+    "\n",
+    "This will be improved in the future. However, the token count summary is accurate. You can use the token count to calculate the cost yourself.\n",
+    "\n",
     "## Requirements\n",
     "\n",
     "AutoGen requires `Python>=3.8`:\n",
@@ -43,12 +53,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [],
    "source": [
     "import autogen\n",
     "from autogen import OpenAIWrapper\n",
+    "from autogen import AssistantAgent, UserProxyAgent\n",
+    "from autogen.agent_utils import gather_usage_summary\n",
     "\n",
     "# config_list = autogen.config_list_from_json(\n",
     "#     \"OAI_CONFIG_LIST\",\n",
@@ -60,7 +72,7 @@
     "config_list = autogen.config_list_from_json(\n",
     "    \"OAI_CONFIG_LIST\",\n",
     "    filter_dict={\n",
-    "        \"model\": [\"gpt-3.5-turbo\", \"gpt-35-turbo\"],\n",
+    "        \"model\": [\"gpt-35-turbo-0613\"],\n",
     "    },\n",
     ")"
    ]
@@ -79,7 +91,7 @@
     "        \"api_key\": \"<your OpenAI API key>\",\n",
     "    },  # OpenAI API endpoint for gpt-4\n",
     "    {\n",
-    "        \"model\": \"gpt-35-turbo-0631\",  # 0631 or newer is needed to use functions\n",
+    "        \"model\": \"gpt-35-turbo-0613\",  # 0613 or newer is needed to use functions\n",
     "        \"base_url\": \"<your Azure OpenAI API base>\", \n",
     "        \"api_type\": \"azure\", \n",
     "        \"api_version\": \"2023-08-01-preview\", # 2023-07-01-preview or newer is needed to use functions\n",
@@ -107,8 +119,7 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "In update_usage_summary\n",
-      "0.0001555\n"
+      "0.0003215\n"
      ]
     }
    ],
@@ -125,7 +136,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Usage Summary\n",
+    "## Usage Summary for OpenAIWrapper\n",
     "\n",
     "When creating a instance of OpenAIWrapper, cost of all completions from the same instance is recorded. You can call `print_usage_summary()` to checkout your usage summary. To clear up, use `clear_usage_summary()`.\n"
    ]
@@ -283,6 +294,209 @@
     "response = client.create(messages=messages, model=\"gpt-35-turbo-1106\", cache_seed=41)\n",
     "client.print_usage_summary()"
    ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Usage Summary for Agents\n",
+    "\n",
+    "- `Agent.print_usage_summary()` will print the cost summary for the agent.\n",
+    "- `Agent.get_actual_usage()` and `Agent.get_total_usage()` will return the usage summary in a dict. When an agent doesn't use LLM, they will return None.\n",
+    "- `Agent.reset()` will reset the usage summary.\n",
+    "- `autogen.agent_utils.gather_usage_summary` will gather the usage summary for a list of agents."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 19,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[33mai_user\u001b[0m (to assistant):\n",
+      "\n",
+      "$x^3=125$. What is x?\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[33massistant\u001b[0m (to ai_user):\n",
+      "\n",
+      "To find the value of x, we need to find the cube root of 125. \n",
+      "\n",
+      "The cube root of 125 is 5. \n",
+      "\n",
+      "Therefore, x = 5.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33mai_user\u001b[0m (to assistant):\n",
+      "\n",
+      "Great job! Your answer is correct.\n",
+      "\n",
+      "Indeed, to find the value of x in the equation $x^3 = 125$, we need to find the cube root of 125. The cube root of 125 is indeed 5.\n",
+      "\n",
+      "Therefore, x = 5 is the correct solution. Well done!\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n",
+      "\u001b[33massistant\u001b[0m (to ai_user):\n",
+      "\n",
+      "Thank you! I'm glad I could assist you. If you have any more questions, feel free to ask.\n",
+      "\n",
+      "--------------------------------------------------------------------------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "assistant = AssistantAgent(\n",
+    "    \"assistant\",\n",
+    "    system_message=\"You are a helpful assistant.\",\n",
+    "    llm_config={\n",
+    "        \"timeout\": 600,\n",
+    "        \"cache_seed\": None,\n",
+    "        \"config_list\": config_list,\n",
+    "    },\n",
+    ")\n",
+    "\n",
+    "ai_user_proxy = UserProxyAgent(\n",
+    "    name=\"ai_user\",\n",
+    "    human_input_mode=\"NEVER\",\n",
+    "    max_consecutive_auto_reply=1,\n",
+    "    code_execution_config=False,\n",
+    "    llm_config={\n",
+    "        \"config_list\": config_list,\n",
+    "    },\n",
+    "    # In the system message the \"user\" always refers to the other agent.\n",
+    "    system_message=\"You ask a user for help. You check the answer from the user and provide feedback.\",\n",
+    ")\n",
+    "assistant.reset()\n",
+    "\n",
+    "math_problem = \"$x^3=125$. What is x?\"\n",
+    "ai_user_proxy.initiate_chat(\n",
+    "    assistant,\n",
+    "    message=math_problem,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 20,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Agent 'ai_user':\n",
+      "----------------------------------------------------------------------------------------------------\n",
+      "Usage summary excluding cached usage: \n",
+      "Total cost: 0.00025\n",
+      "* Model 'gpt-35-turbo': cost: 0.00025, prompt_tokens: 80, completion_tokens: 63, total_tokens: 143\n",
+      "\n",
+      "All completions are non-cached: the total cost with cached completions is the same as actual cost.\n",
+      "----------------------------------------------------------------------------------------------------\n",
+      "\n",
+      "Agent 'assistant':\n",
+      "----------------------------------------------------------------------------------------------------\n",
+      "Usage summary excluding cached usage: \n",
+      "Total cost: 0.00036\n",
+      "* Model 'gpt-35-turbo': cost: 0.00036, prompt_tokens: 162, completion_tokens: 60, total_tokens: 222\n",
+      "\n",
+      "All completions are non-cached: the total cost with cached completions is the same as actual cost.\n",
+      "----------------------------------------------------------------------------------------------------\n"
+     ]
+    }
+   ],
+   "source": [
+    "ai_user_proxy.print_usage_summary()\n",
+    "print()\n",
+    "assistant.print_usage_summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "No cost incurred from agent 'user'.\n"
+     ]
+    }
+   ],
+   "source": [
+    "user_proxy = UserProxyAgent(\n",
+    "    name=\"user\",\n",
+    "    human_input_mode=\"NEVER\",\n",
+    "    max_consecutive_auto_reply=2,\n",
+    "    code_execution_config=False,\n",
+    "    default_auto_reply=\"That's all. Thank you.\",\n",
+    ")\n",
+    "user_proxy.print_usage_summary()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 33,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Actual usage summary for assistant (excluding completion from cache): {'total_cost': 0.00036300000000000004, 'gpt-35-turbo': {'cost': 0.00036300000000000004, 'prompt_tokens': 162, 'completion_tokens': 60, 'total_tokens': 222}}\n",
+      "Total usage summary for assistant (including completion from cache): {'total_cost': 0.00036300000000000004, 'gpt-35-turbo': {'cost': 0.00036300000000000004, 'prompt_tokens': 162, 'completion_tokens': 60, 'total_tokens': 222}}\n",
+      "Actual usage summary for ai_user_proxy: {'total_cost': 0.000246, 'gpt-35-turbo': {'cost': 0.000246, 'prompt_tokens': 80, 'completion_tokens': 63, 'total_tokens': 143}}\n",
+      "Total usage summary for ai_user_proxy: {'total_cost': 0.000246, 'gpt-35-turbo': {'cost': 0.000246, 'prompt_tokens': 80, 'completion_tokens': 63, 'total_tokens': 143}}\n",
+      "Actual usage summary for user_proxy: None\n",
+      "Total usage summary for user_proxy: None\n"
+     ]
+    }
+   ],
+   "source": [
+    "print(\"Actual usage summary for assistant (excluding completion from cache):\", assistant.get_actual_usage())\n",
+    "print(\"Total usage summary for assistant (including completion from cache):\", assistant.get_total_usage())\n",
+    "\n",
+    "print(\"Actual usage summary for ai_user_proxy:\", ai_user_proxy.get_actual_usage())\n",
+    "print(\"Total usage summary for ai_user_proxy:\", ai_user_proxy.get_total_usage())\n",
+    "\n",
+    "print(\"Actual usage summary for user_proxy:\", user_proxy.get_actual_usage())\n",
+    "print(\"Total usage summary for user_proxy:\", user_proxy.get_total_usage())"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'total_cost': 0.0006090000000000001,\n",
+       " 'gpt-35-turbo': {'cost': 0.0006090000000000001,\n",
+       "  'prompt_tokens': 242,\n",
+       "  'completion_tokens': 123,\n",
+       "  'total_tokens': 365}}"
+      ]
+     },
+     "execution_count": 27,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "total_usage_summary, actual_usage_summary = gather_usage_summary([assistant, ai_user_proxy, user_proxy])\n",
+    "total_usage_summary"
+   ]
   }
  ],
  "metadata": {
@@ -301,7 +515,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.6"
+   "version": "3.9.18"
   }
  },
  "nbformat": 4,