diff --git a/autogen/agentchat/contrib/compressible_agent.py b/autogen/agentchat/contrib/compressible_agent.py index e196773effc..4b4e93cb3a5 100644 --- a/autogen/agentchat/contrib/compressible_agent.py +++ b/autogen/agentchat/contrib/compressible_agent.py @@ -73,6 +73,7 @@ def __init__( system_message (str): system message for the ChatCompletion inference. Please override this attribute if you want to reprogram the agent. llm_config (dict): llm inference configuration. + Note: you must set `model` in llm_config. It will be used to compute the token count. Please refer to [OpenAIWrapper.create](/docs/reference/oai/client#create) for available options. is_termination_msg (function): a function that takes a message in the form of a dictionary @@ -121,6 +122,8 @@ def __init__( self.llm_compress_config = False self.compress_client = None else: + if "model" not in llm_config: + raise ValueError("llm_config must contain the 'model' field.") self.llm_compress_config = self.llm_config.copy() # remove functions if "functions" in self.llm_compress_config: diff --git a/notebook/agentchat_compression.ipynb b/notebook/agentchat_compression.ipynb index 3ba57a728b2..2bbfd01780f 100644 --- a/notebook/agentchat_compression.ipynb +++ b/notebook/agentchat_compression.ipynb @@ -92,7 +92,7 @@ "config_list = autogen.config_list_from_json(\n", " \"OAI_CONFIG_LIST\",\n", " filter_dict={\n", - " \"model\": [\"gpt-4\", \"gpt-4-0314\", \"gpt4\", \"gpt-4-32k\", \"gpt-4-32k-0314\", \"gpt-4-32k-v0314\"],\n", + " \"model\": [\"gpt-4-1106-preview\"],\n", " },\n", ")" ] @@ -139,8 +139,10 @@ "## Example 1\n", "This example is from [agentchat_MathChat.ipynb](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_MathChat.ipynb). Compression with code execution.\n", "\n", + "You must set the `model` field in `llm_config`, as it will be used to calculate the token usage.\n", + "\n", "Note: we set `trigger_count=600`, and `leave_last_n=2`. In this example, we set a low trigger_count to demonstrate the compression feature. \n", - "The token count after compression is still bigger than trigger count, mainly because the trigger count is low an the first and last 2 messages are not compressed. Thus, the compression is performed at each turn. In practice, you want to adjust the trigger_count to a bigger number and properly set the `leave_last_n` to avoid compression at each turn. " + "The token count after compression is still bigger than trigger count, mainly because the trigger count is low an the first and last 2 messages are not compressed. Thus, the compression is performed at each turn. In practice, you want to adjust the trigger_count to a bigger number and properly set the `leave_last_n` to avoid compression at each turn. \n" ] }, { @@ -548,6 +550,7 @@ " \"timeout\": 600,\n", " \"cache_seed\": 42,\n", " \"config_list\": config_list,\n", + " \"model\": \"gpt-4-1106-preview\", # you must set the model field in llm_config, as it will be used to calculate the token usage.\n", " },\n", " compress_config={\n", " \"mode\": \"COMPRESS\",\n", @@ -785,6 +788,7 @@ ], "source": [ "llm_config = {\n", + " \"model\": \"gpt-4-1106-preview\",\n", " \"functions\": [\n", " {\n", " \"name\": \"python\",\n", @@ -1249,6 +1253,7 @@ " \"timeout\": 600,\n", " \"cache_seed\": 43,\n", " \"config_list\": config_list,\n", + " \"model\": \"gpt-4-1106-preview\",\n", " },\n", " compress_config={\n", " \"mode\": \"CUSTOMIZED\",\n",