diff --git a/autogen/agentchat/contrib/compressible_agent.py b/autogen/agentchat/contrib/compressible_agent.py
index e196773effc..4b4e93cb3a5 100644
--- a/autogen/agentchat/contrib/compressible_agent.py
+++ b/autogen/agentchat/contrib/compressible_agent.py
@@ -73,6 +73,7 @@ def __init__(
             system_message (str): system message for the ChatCompletion inference.
                 Please override this attribute if you want to reprogram the agent.
             llm_config (dict): llm inference configuration.
+                Note: you must set `model` in llm_config. It will be used to compute the token count.
                 Please refer to [OpenAIWrapper.create](/docs/reference/oai/client#create)
                 for available options.
             is_termination_msg (function): a function that takes a message in the form of a dictionary
@@ -121,6 +122,8 @@ def __init__(
             self.llm_compress_config = False
             self.compress_client = None
         else:
+            if "model" not in llm_config:
+                raise ValueError("llm_config must contain the 'model' field.")
             self.llm_compress_config = self.llm_config.copy()
             # remove functions
             if "functions" in self.llm_compress_config:
diff --git a/notebook/agentchat_compression.ipynb b/notebook/agentchat_compression.ipynb
index 3ba57a728b2..2bbfd01780f 100644
--- a/notebook/agentchat_compression.ipynb
+++ b/notebook/agentchat_compression.ipynb
@@ -92,7 +92,7 @@
     "config_list = autogen.config_list_from_json(\n",
     "    \"OAI_CONFIG_LIST\",\n",
     "    filter_dict={\n",
-    "        \"model\": [\"gpt-4\", \"gpt-4-0314\", \"gpt4\", \"gpt-4-32k\", \"gpt-4-32k-0314\", \"gpt-4-32k-v0314\"],\n",
+    "        \"model\": [\"gpt-4-1106-preview\"],\n",
     "    },\n",
     ")"
    ]
@@ -139,8 +139,10 @@
     "## Example 1\n",
     "This example is from [agentchat_MathChat.ipynb](https://github.com/microsoft/autogen/blob/main/notebook/agentchat_MathChat.ipynb). Compression with code execution.\n",
     "\n",
+    "You must set the `model` field in `llm_config`, as it will be used to calculate the token usage.\n",
+    "\n",
     "Note: we set `trigger_count=600`, and `leave_last_n=2`. In this example, we set a low trigger_count to demonstrate the compression feature. \n",
-    "The token count after compression is still bigger than trigger count, mainly because the trigger count is low an the first and last 2 messages are not compressed. Thus, the compression is performed at each turn. In practice, you want to adjust the trigger_count to a bigger number and properly set the `leave_last_n` to avoid compression at each turn. "
+    "The token count after compression is still bigger than trigger count, mainly because the trigger count is low an the first and last 2 messages are not compressed. Thus, the compression is performed at each turn. In practice, you want to adjust the trigger_count to a bigger number and properly set the `leave_last_n` to avoid compression at each turn. \n"
    ]
   },
   {
@@ -548,6 +550,7 @@
     "        \"timeout\": 600,\n",
     "        \"cache_seed\": 42,\n",
     "        \"config_list\": config_list,\n",
+    "        \"model\": \"gpt-4-1106-preview\",  # you must set the model field in llm_config, as it will be used to calculate the token usage.\n",
     "    },\n",
     "    compress_config={\n",
     "        \"mode\": \"COMPRESS\",\n",
@@ -785,6 +788,7 @@
    ],
    "source": [
     "llm_config = {\n",
+    "    \"model\": \"gpt-4-1106-preview\",\n",
     "    \"functions\": [\n",
     "        {\n",
     "            \"name\": \"python\",\n",
@@ -1249,6 +1253,7 @@
     "        \"timeout\": 600,\n",
     "        \"cache_seed\": 43,\n",
     "        \"config_list\": config_list,\n",
+    "        \"model\": \"gpt-4-1106-preview\",\n",
     "    },\n",
     "    compress_config={\n",
     "        \"mode\": \"CUSTOMIZED\",\n",