Improving logging in oai.completion to show token_count (#179)

* update * update doc --------- Co-authored-by: Chi Wang <wang.chi@microsoft.com> Co-authored-by: Qingyun Wu <qingyun.wu@psu.edu>
microsoft · Oct 12, 2023 · b61aeb6 · b61aeb6
1 parent 6b14bd6
commit b61aeb6
Show file tree

Hide file tree

Showing 3 changed files with 62 additions and 0 deletions.
diff --git a/autogen/oai/completion.py b/autogen/oai/completion.py
@@ -9,6 +9,7 @@
 from flaml.tune.space import is_constant
 from flaml.automl.logger import logger_formatter
 from .openai_utils import get_key
+from collections import defaultdict
 
 try:
     import openai
@@ -157,6 +158,7 @@ def _book_keeping(cls, config: Dict, response):
             value = {
                 "created_at": [],
                 "cost": [],
+                "token_count": [],
             }
             if "messages" in config:
                 messages = config["messages"]
@@ -168,6 +170,14 @@ def _book_keeping(cls, config: Dict, response):
                 key = get_key([config["prompt"]] + [choice.get("text") for choice in response["choices"]])
             value["created_at"].append(cls._count_create)
             value["cost"].append(response["cost"])
+            value["token_count"].append(
+                {
+                    "model": response["model"],
+                    "prompt_tokens": response["usage"]["prompt_tokens"],
+                    "completion_tokens": response["usage"].get("completion_tokens", 0),
+                    "total_tokens": response["usage"]["total_tokens"],
+                }
+            )
             cls._history_dict[key] = value
             cls._count_create += 1
             return
@@ -1067,6 +1077,44 @@ def logged_history(cls) -> Dict:
         """Return the book keeping dictionary."""
         return cls._history_dict
 
+    @classmethod
+    def print_usage_summary(cls) -> Dict:
+        """Return the usage summary."""
+        if cls._history_dict is None:
+            print("No usage summary available.", flush=True)
+
+        token_count_summary = defaultdict(lambda: {"prompt_tokens": 0, "completion_tokens": 0, "total_tokens": 0})
+
+        if not cls._history_compact:
+            source = cls._history_dict.values()
+            total_cost = sum(msg_pair["response"]["cost"] for msg_pair in source)
+        else:
+            # source = cls._history_dict["token_count"]
+            # total_cost = sum(cls._history_dict['cost'])
+            total_cost = sum(sum(value_list["cost"]) for value_list in cls._history_dict.values())
+            source = (
+                token_data for value_list in cls._history_dict.values() for token_data in value_list["token_count"]
+            )
+
+        for entry in source:
+            if not cls._history_compact:
+                model = entry["response"]["model"]
+                token_data = entry["response"]["usage"]
+            else:
+                model = entry["model"]
+                token_data = entry
+
+            token_count_summary[model]["prompt_tokens"] += token_data["prompt_tokens"]
+            token_count_summary[model]["completion_tokens"] += token_data["completion_tokens"]
+            token_count_summary[model]["total_tokens"] += token_data["total_tokens"]
+
+        print(f"Total cost: {total_cost}", flush=True)
+        for model, counts in token_count_summary.items():
+            print(
+                f"Token count summary for model {model}: prompt_tokens: {counts['prompt_tokens']}, completion_tokens: {counts['completion_tokens']}, total_tokens: {counts['total_tokens']}",
+                flush=True,
+            )
+
     @classmethod
     def start_logging(
         cls, history_dict: Optional[Dict] = None, compact: Optional[bool] = True, reset_counter: Optional[bool] = True

diff --git a/test/agentchat/test_assistant_agent.py b/test/agentchat/test_assistant_agent.py
@@ -148,9 +148,11 @@ def test_create_execute_script(human_input_mode="NEVER", max_consecutive_auto_re
 ```""",
     )
     print(conversations)
+    autogen.ChatCompletion.print_usage_summary()
     autogen.ChatCompletion.start_logging(compact=False)
     user.send("""Execute temp.py""", assistant)
     print(autogen.ChatCompletion.logged_history)
+    autogen.ChatCompletion.print_usage_summary()
     autogen.ChatCompletion.stop_logging()
 
 

diff --git a/website/docs/Use-Cases/enhanced_inference.md b/website/docs/Use-Cases/enhanced_inference.md
@@ -260,6 +260,10 @@ The API calls made after this will be automatically logged. They can be retrieve
 ```python
 autogen.ChatCompletion.logged_history
 ```
+There is a function that can be used to print usage summary (total cost, and token count usage from each model):
+```python
+autogen.ChatCompletion.print_usage_summary()
+```
 To stop logging, use
 ```python
 autogen.ChatCompletion.stop_logging()
@@ -366,5 +370,13 @@ Set `compact=False` in `start_logging()` to switch.
     },
 }
 ```
+
+* Example of printing for usage summary
+```
+Total cost: <cost>
+Token count summary for model <model>: prompt_tokens: <count 1>, completion_tokens: <count 2>, total_tokens: <count 3>
+```
+
+
 It can be seen that the individual API call history contains redundant information of the conversation. For a long conversation the degree of redundancy is high.
 The compact history is more efficient and the individual API call history contains more details.