diff --git a/docs/README.md b/docs/README.md
index f119bdff..d5a78c4a 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -20,7 +20,7 @@ The documentation is available in both English and Simplified Chinese. We use
 
 You need to install the Python package `sphinx-intl` before starting.
 
-1. After updating the English documentation, run `make gettext`, and the pot files will be placed in the `build/gettext` directory.
+1. After updating the English documentation, run `make gettext`, and the pot files will be placed in the `build/gettext` directory. `make gettext` can be slow if the doc is long.
 
 2. Use the generated pot files to update the po files:
     ```bash
@@ -29,4 +29,22 @@ You need to install the Python package `sphinx-intl` before starting.
 
 3. Translate po files at `locales\zh_CN\LC_MESSAGES`. Pay attention to fuzzy matches (messages after `#, fuzzy`). Please be careful not to break reST notation.
 
-4. Build translated document: `make -e SPHINXOPTS="-D language='zh_CN'" html` or `sphinx-build -M html source build -D language=zh_CN`
\ No newline at end of file
+4. Build translated document: `make -e SPHINXOPTS="-D language='zh_CN'" html` or `sphinx-build -M html source build -D language=zh_CN`
+
+## Auto Build
+
+```bash
+pip install sphinx-autobuild
+```
+
+To autobuild the default version:
+```bash
+sphinx-autobuild source build/html
+```
+
+To autobuild the translated version:
+```bash
+sphinx-autobuild source build/html -D language=zh_CN --watch locales/zh_CN
+```
+
+By default, the doc is at `http://127.0.0.1:8000`
\ No newline at end of file
diff --git a/docs/source/framework/function_call.md b/docs/source/framework/function_call.md
index 24fd2b24..dcdea5b7 100644
--- a/docs/source/framework/function_call.md
+++ b/docs/source/framework/function_call.md
@@ -86,7 +86,6 @@ To set up the example case, you can use the following code:
 
 ```python
 import json
-import re
 
 def get_current_temperature(location: str, unit: str = "celsius"):
     """Get current temperature at a location.
@@ -130,29 +129,6 @@ def get_function_by_name(name):
     if name == "get_temperature_date":
         return get_temperature_date
 
-def try_parse_tool_calls(content: str):
-    """Try parse the tool calls."""
-    tool_calls = []
-    offset = 0
-    for i, m in enumerate(re.finditer(r"<tool_call>(.+)?</tool_call>", content)):
-        if i == 0:
-            offset = m.start()
-        try:
-            func = json.loads(m.group(1))
-            tool_calls.append({"type": "function", "function": func})
-            if isinstance(func["arguments"], str):
-                func["arguments"] = json.loads(func["arguments"])
-        except json.JSONDecodeError as _:
-            print(m)
-            pass
-    if tool_calls:
-        if offset > 0 and content[:offset].strip():
-            c = content[:offset]
-        else: 
-            c = ""
-        return {"role": "assistant", "content": c, "tool_calls": tool_calls}
-    return {"role": "assistant", "content": re.sub(r"<\|im_end\|>$", "", content)}
-
 TOOLS = [
     {
         "type": "function",
@@ -204,7 +180,7 @@ TOOLS = [
     },
 ]
 MESSAGES = [
-    {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\n\nCurrent Date: 2024-08-31"},
+    {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\n\nCurrent Date: 2024-09-30"},
     {"role": "user",  "content": "What's the temperature in San Francisco now? How about tomorrow?"},
 ]
 ```
@@ -302,11 +278,10 @@ You could append the date to user message in your application code.
 
 ```json
 [
-    {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\n\nCurrent Date: 2024-08-31"},
+    {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\n\nCurrent Date: 2024-09-30"},
     {"role": "user",  "content": "What's the temperature in San Francisco now? How about tomorrow?"}
 ]
 ```
-
 :::
 
 ### Qwen-Agent
@@ -323,7 +298,7 @@ Before starting, let's make sure the latest library is installed:
 pip install -U qwen-agent
 ```
 
-For this guide, we are at version v0.0.9.
+For this guide, we are at version v0.0.10.
 
 #### Preparing
 
@@ -350,8 +325,8 @@ For model inputs, the common message structure for system, user, and assistant h
 ```python
 messages = MESSAGES[:]
 # [
-#     {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\n\nCurrent Date: 2024-08-31"},
-#     {"role": "user",  "content": "What's the temperature in San Francisco now? How about tomorrow?"},
+#    {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\n\nCurrent Date: 2024-09-30"},
+#    {"role": "user", "content": "What's the temperature in San Francisco now? How about tomorrow?"}
 # ]
 ```
 
@@ -389,8 +364,8 @@ Since we enable `parallel_function_calls`, we should get two messages in the res
 
 ```python
 [
-    {"role": "assistant", "content": "", "function_call": {"name": "get_current_temperature", "arguments": '{"location": "San Francisco, CA, USA"}'}},
-    {"role": "assistant", "content": "", "function_call": {"name": "get_temperature_date", "arguments": '{"location": "San Francisco, CA, USA", "date": "2024-09-01"}'}},
+    {'role': 'assistant', 'content': '', 'function_call': {'name': 'get_current_temperature', 'arguments': '{"location": "San Francisco, CA, USA", "unit": "celsius"}'}},
+    {'role': 'assistant', 'content': '', 'function_call': {'name': 'get_temperature_date', 'arguments': '{"location": "San Francisco, CA, USA", "date": "2024-10-01", "unit": "celsius"}'}},
 ]
 ```
 
@@ -403,7 +378,6 @@ Note that Qwen2.5-7B-Instruct is quite capable:
 - It has followed the function instructions to add the state and the country to the location.
 - It has correctly induced the date of tomorrow and given in the format required by the function.
 
-
 Then comes the critical part -- checking and applying the function call:
 ```python3
 for message in responses:
@@ -431,12 +405,12 @@ To get tool results:
 Now the messages are
 ```python
 [
-    {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\n\nCurrent Date: 2024-08-31"},
-    {"role": "user",  "content": "What's the temperature in San Francisco now? How about tomorrow?"},
-    {"role": "assistant", "content": "", "function_call": {"name": "get_current_temperature", "arguments": '{"location": "San Francisco, CA, USA"}'}},
-    {"role": "assistant", "content": "", "function_call": {"name": "get_temperature_date", "arguments": '{"location": "San Francisco, CA, USA", "date": "2024-09-01"}'}},
-    {"role": "function", "name": "get_current_temperature", "content": '{"temperature": 26.1, "location": "San Francisco, CA, USA", "unit": "celsius"}'},
-    {"role": "function", "name": "get_temperature_date", "content": '{"temperature": 25.9, "location": "San Francisco, CA, USA", "date": "2024-09-01", "unit": "celsius"}'},
+    {'role': 'system', 'content': 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\n\nCurrent Date: 2024-09-30'},
+    {'role': 'user', 'content': "What's the temperature in San Francisco now? How about tomorrow?"},
+    {'role': 'assistant', 'content': '', 'function_call': {'name': 'get_current_temperature', 'arguments': '{"location": "San Francisco, CA, USA", "unit": "celsius"}'}},
+    {'role': 'assistant', 'content': '', 'function_call': {'name': 'get_temperature_date', 'arguments': '{"location": "San Francisco, CA, USA", "date": "2024-10-01", "unit": "celsius"}'}},
+    {'role': 'function', 'name': 'get_current_temperature', 'content': '{"temperature": 26.1, "location": "San Francisco, CA, USA", "unit": "celsius"}'},
+    {'role': 'function', 'name': 'get_temperature_date', 'content': '{"temperature": 25.9, "location": "San Francisco, CA, USA", "date": "2024-10-01", "unit": "celsius"}'},
 ]
 ```
 
@@ -453,7 +427,7 @@ messages.extend(responses)
 The final response should be like
 
 ```python
-{"role": "assistant", "content": "The current temperature in San Francisco is 26.1 degrees Celsius. For tomorrow, the forecasted temperature is 25.9 degrees Celsius."}
+{'role': 'assistant', 'content': 'Currently, the temperature in San Francisco is approximately 26.1°C. Tomorrow, on 2024-10-01, the temperature is forecasted to be around 25.9°C.'}
 ```
 
 ### Hugging Face transformers
@@ -527,10 +501,10 @@ output_text = tokenizer.batch_decode(outputs)[0][len(text):]
 The output texts should be like
 ```text
 <tool_call>
-{"name": "get_current_temperature", "arguments": "{\"location\": \"San Francisco, CA, USA\", \"unit\": \"celsius\"}"}
+{"name": "get_current_temperature", "arguments": {"location": "San Francisco, CA, USA"}}
 </tool_call>
 <tool_call>
-{"name": "get_temperature_date", "arguments": "{\"location\": \"San Francisco, CA, USA\", \"date\": \"2024-09-01\", \"unit\": \"celsius\"}"}
+{"name": "get_temperature_date", "arguments": {"location": "San Francisco, CA, USA", "date": "2024-10-01"}}
 </tool_call><|im_end|>
 ```
 
@@ -538,8 +512,38 @@ Now we need to do two things:
 1. Parse the generated tool calls to a message and add them to the messages, so that the model knows which tools are used.
 2. Obtain the results of the tools and add them to the messages, so that the model knows the results of the tool calls.
 
-In `transformers`, the tool calls should be a field of assistant messages.[^tool_call_arg_format]
-Let's use a simple function called `try_parse_tool_calls` to parse the tool calls, which can be found in [the preparation code](#prepcode).
+In `transformers`, the tool calls should be a field of assistant messages.
+Let's use a simple function called `try_parse_tool_calls` to parse the tool calls:
+
+{#parse-function}
+```python
+import re
+
+def try_parse_tool_calls(content: str):
+    """Try parse the tool calls."""
+    tool_calls = []
+    offset = 0
+    for i, m in enumerate(re.finditer(r"<tool_call>\n(.+)?\n</tool_call>", content)):
+        if i == 0:
+            offset = m.start()
+        try:
+            func = json.loads(m.group(1))
+            tool_calls.append({"type": "function", "function": func})
+            if isinstance(func["arguments"], str):
+                func["arguments"] = json.loads(func["arguments"])
+        except json.JSONDecodeError as e:
+            print(f"Failed to parse tool calls: the content is {m.group(1)} and {e}")
+            pass
+    if tool_calls:
+        if offset > 0 and content[:offset].strip():
+            c = content[:offset]
+        else: 
+            c = ""
+        return {"role": "assistant", "content": c, "tool_calls": tool_calls}
+    return {"role": "assistant", "content": re.sub(r"<\|im_end\|>$", "", content)}
+```
+
+
 This function does not cover all possible scenarios and thus is prone to errors.
 But it should suffice for the purpose of this guide. 
 
@@ -556,17 +560,19 @@ The template in the `tokenizer_config.json` assumes that the generated content a
 ```
 instead of 
 ```json
-{
-  "role": "assistant", 
-  "content": "To obtain the current temperature, I should call the functions `get_current_temperate`.", 
-}
-{
-  "role": "assistant", 
-  "content": "", 
-  "tool_calls": [
-    {"type": "function", "function": {"name": "get_current_temperature", "arguments": {"location": "San Francisco, CA, USA", "unit": "celsius"}}}
-  ]
-}
+[
+  {
+    "role": "assistant", 
+    "content": "To obtain the current temperature, I should call the functions `get_current_temperate`.", 
+  },
+  {
+    "role": "assistant", 
+    "content": "", 
+    "tool_calls": [
+      {"type": "function", "function": {"name": "get_current_temperature", "arguments": {"location": "San Francisco, CA, USA", "unit": "celsius"}}}
+    ]
+  }
+]
 ```
 
 This is implemented roughly in `try_parse_tool_calls` but keep that in mind if you are writing your own tool call parser.
@@ -593,14 +599,14 @@ if tool_calls := messages[-1].get("tool_calls", None):
 The messages now should be like
 ```python
 [
-    {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\n\nCurrent Date: 2024-08-31"},
-    {"role": "user",  "content": "What's the temperature in San Francisco now? How about tomorrow?"},
-    {"role": "assistant", "content": "", "tool_calls": [
-        {'type': 'function', 'function': {'name': 'get_current_temperature', 'arguments': {'location': 'San Francisco, CA, USA', 'unit': 'celsius'}}},
-        {'type': 'function', 'function': {'name': 'get_temperature_date', 'arguments': {'location': 'San Francisco, CA, USA', 'date': '2024-09-01', 'unit': 'celsius'}}}
+    {'role': 'system', 'content': 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\n\nCurrent Date: 2024-09-30'},
+    {'role': 'user', 'content': "What's the temperature in San Francisco now? How about tomorrow?"},
+    {'role': 'assistant', 'content': '', 'tool_calls': [
+        {'type': 'function', 'function': {'name': 'get_current_temperature', 'arguments': {'location': 'San Francisco, CA, USA'}}}, 
+        {'type': 'function', 'function': {'name': 'get_temperature_date', 'arguments': {'location': 'San Francisco, CA, USA', 'date': '2024-10-01'}}},
     ]},
     {'role': 'tool', 'name': 'get_current_temperature', 'content': '{"temperature": 26.1, "location": "San Francisco, CA, USA", "unit": "celsius"}'},
-    {'role': 'tool', 'name': 'get_temperature_date', 'content': '{"temperature": 25.9, "location": "San Francisco, CA, USA", "date": "2024-09-01", "unit": "celsius"}'},
+    {'role': 'tool', 'name': 'get_temperature_date', 'content': '{"temperature": 25.9, "location": "San Francisco, CA, USA", "date": "2024-10-01", "unit": "celsius"}'},
 ]
 ```
 
@@ -624,7 +630,7 @@ output_text = tokenizer.batch_decode(outputs)[0][len(text):]
 
 The output_text should be like
 ```
-The current temperature in San Francisco is 26.1°C. The temperature for tomorrow in San Francisco is expected to be 25.9°C.<|im_end|>
+The current temperature in San Francisco is approximately 26.1°C. Tomorrow, on October 1, 2024, the temperature is expected to be around 25.9°C.<|im_end|>
 ```
 
 Add the result text as an assistant message and the final messages should be ready for further interaction:
@@ -632,9 +638,6 @@ Add the result text as an assistant message and the final messages should be rea
 messages.append(try_parse_tool_calls(output_text))
 ```
 
-[^tool_call_arg_format]: However, note that the model generates arguments in tool calls not as a JSON object but a JSON-formatted string of the JSON object. 
-    For `transformers` and `ollama`, as the interfaces require the arguments to be JSON objects or Python dicts, there will be differences between the actual model generation and the template results for tool call arguments.
-
 ### Ollama
 
 Ollama is a set of tools for serving LLMs locally. 
@@ -663,13 +666,12 @@ For this guide, the `ollama` binary is at v0.3.9 and the `ollama` Python library
 
 The messages structure used in Ollama is the same with that in `transformers` and the template in [Qwen2.5 Ollama models](https://ollama.com/library/qwen2.5) has supported tool use. 
 
-
 The inputs are the same with those in [the preparation code](#prepcode):
 ```python
 tools = TOOLS
 messages = MESSAGES[:]
 
-model_name = "qwen2:7b"
+model_name = "qwen2.5:7b"
 ```
 Note that you cannot pass Python functions as tools directly and `tools` has to be a `dict`.
 
@@ -691,23 +693,25 @@ response = ollama.chat(
 The main fields in the response could be:
 ```python
 {
-    "model": "qwen2:7b",
-    "message": {
-        "role": "assistant",
-        "content": '<|tool_call_start|>{"name": "get_current_temperature", "arguments": "{\\"location\\": \\"San Francisco, CA, USA\\", \\"unit\\": \\"celsius\\"}"}<|tool_call_end|>\n<|tool_call_start|>{"name": "get_temperature_date", "arguments": "{\\"date\\": \\"2024-09-01\\", \\"location\\": \\"San Francisco, CA, USA\\", \\"unit\\": \\"celsius\\"}"}<|tool_call_end|>'
+    'model': 'qwen2.5:7b',
+    'message': {
+        'role': 'assistant',
+        'content': '',
+        'tool_calls': [
+            {'function': {'name': 'get_current_temperature', 'arguments': {'location': 'San Francisco, CA, USA'}}},
+            {'function': {'name': 'get_temperature_date', 'arguments': {'date': '2024-10-01', 'location': 'San Francisco, CA, USA'}}},
+        ],
     },
-    "done_reason": 'stop',
-    "done": True,
 }
 ```
 
-Ollama's tool call parser has succeeded in parsing the tool results.[^tool_call_arg_format]
-If not, you may refine [the `try_parse_tool_calls` function above](#prepcode).
+Ollama's tool call parser has succeeded in parsing the tool results.
+If not, you may refine [the `try_parse_tool_calls` function above](#parse-function).
 Then, we can obtain the tool results and add them to the messages.
 The following is basically the same with `transformers`:
 
 ```python
-messages.append(try_parse_tool_calls(response["message"]["content"]))
+messages.append(response["message"])
 
 if tool_calls := messages[-1].get("tool_calls", None):
     for tool_call in tool_calls:
@@ -727,14 +731,14 @@ if tool_calls := messages[-1].get("tool_calls", None):
 The messages are now like
 ```python
 [
-    {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\n\nCurrent Date: 2024-08-31"},
-    {"role": "user", "content": "What's the temperature in San Francisco now? How about tomorrow?"},
-    {"role": "assistant", "content": "", "tool_calls": [
-        {'function': {'name': 'get_current_temperature', 'arguments': {'location': 'San Francisco, CA, USA', "unit": "celsius"}}},
-        {'function': {'name': 'get_temperature_date', 'arguments': {'date': '2024-09-01', 'location': 'San Francisco, CA, USA', "unit": "celsius"}}}
+    {'role': 'system', 'content': 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\n\nCurrent Date: 2024-09-30'},
+    {'role': 'user', 'content': "What's the temperature in San Francisco now? How about tomorrow?"},
+    {'role': 'assistant', 'content': '', 'tool_calls': [
+        {'function': {'name': 'get_current_temperature', 'arguments': {'location': 'San Francisco, CA, USA'}}},
+        {'function': {'name': 'get_temperature_date', 'arguments': {'date': '2024-10-01', 'location': 'San Francisco, CA, USA'}}},
     ]},
     {'role': 'tool', 'name': 'get_current_temperature', 'content': '{"temperature": 26.1, "location": "San Francisco, CA, USA", "unit": "celsius"}'},
-    {'role': 'tool', 'name': 'get_temperature_date', 'content': '{"temperature": 25.9, "location": "San Francisco, CA, USA", "date": "2024-09-01", "unit": "celsius"}'},
+    {'role': 'tool', 'name': 'get_temperature_date', 'content': '{"temperature": 25.9, "location": "San Francisco, CA, USA", "date": "2024-10-01", "unit": "celsius"}'},
 ]
 ```
 
@@ -753,7 +757,7 @@ messages.append(response["message"])
 
 The final message should be like the following:
 ```python
-{"role": "assistant", "content": "The current temperature in San Francisco is approximately 26.1 degrees Celsius. Tomorrow, the forecasted temperature in San Francisco will be around 25.9 degrees Celsius."}
+{'role': 'assistant', 'content': 'The current temperature in San Francisco is approximately 26.1°C. For tomorrow, October 1st, 2024, the forecasted temperature will be around 25.9°C.'}
 ```
 
 (heading-target)=
@@ -788,7 +792,7 @@ messages = MESSAGES[:]
 Let's also initialize the client:
 
 ```python
-import openai
+from openai import OpenAI
 
 openai_api_key = "EMPTY"
 openai_api_base = "http://localhost:8000/v1"
@@ -797,6 +801,8 @@ client = OpenAI(
     api_key=openai_api_key,
     base_url=openai_api_base,
 )
+
+model_name = "Qwen/Qwen2.5-7B-Instruct"
 ```
 
 #### Tool Calls and Tool Results
@@ -804,9 +810,8 @@ client = OpenAI(
 We can use the create chat completions endpoint to query the model:
 
 ```python
-
 response = client.chat.completions.create(
-    model="Qwen/Qwen2.5-7B-Instruct",
+    model=model_name,
     messages=messages,
     tools=tools,
     temperature=0.7,
@@ -824,18 +829,24 @@ Choice(
     finish_reason='tool_calls', 
     index=0, 
     logprobs=None, 
-    message=chat.completionsMessage(
+    message=ChatCompletionMessage(
         content=None, 
         role='assistant', 
         function_call=None, 
         tool_calls=[
-            chat.completionsMessageToolCall(
-                id='call_62136354', 
-                function=Function(
-                    arguments='{"order_id":"order_12345"}', 
-                    name='get_delivery_date'), 
-                type='function')
-        ])
+            ChatCompletionMessageToolCall(
+                id='chatcmpl-tool-924d705adb044ff88e0ef3afdd155f15', 
+                function=Function(arguments='{"location": "San Francisco, CA, USA"}', name='get_current_temperature'), 
+                type='function',
+            ), 
+            ChatCompletionMessageToolCall(
+                id='chatcmpl-tool-7e30313081944b11b6e5ebfd02e8e501', 
+                function=Function(arguments='{"location": "San Francisco, CA, USA", "date": "2024-10-01"}', name='get_temperature_date'), 
+                type='function',
+            ),
+        ],
+    ), 
+    stop_reason=None,
 )
 ```
 
@@ -847,7 +858,7 @@ For production code, we should try parsing by ourselves.
 Then, we can obtain the tool results and add them to the messages as shown below:
 
 ```python
-messages.append(response['choices'][0]['message'])
+messages.append(response.choices[0].message.model_dump())
 
 if tool_calls := messages[-1].get("tool_calls", None):
     for tool_call in tool_calls:
@@ -870,14 +881,14 @@ It should be noted that the OpenAI API uses `tool_call_id` to identify the relat
 The messages are now like
 ```python
 [
-    {"role": "system", "content": "You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\n\nCurrent Date: 2024-08-31"},
-    {"role": "user", "content": "What's the temperature in San Francisco now? How about tomorrow?"},
-    {"role": "assistant", "tool_calls": [
-        {"id": "call_xx",  "type": "function", 'function': {'name': 'get_current_temperature', 'arguments': '{"location": "San Francisco, CA, USA"}'}},
-        {"id": "call_xxx",  "type": "function", 'function': {'name': 'get_temperature_date', 'arguments': '{"location": "San Francisco, CA, USA", "date": "2024-09-01"}'}}
+    {'role': 'system', 'content': 'You are Qwen, created by Alibaba Cloud. You are a helpful assistant.\n\nCurrent Date: 2024-09-30'},
+    {'role': 'user', 'content': "What's the temperature in San Francisco now? How about tomorrow?"},
+    {'content': None, 'role': 'assistant', 'function_call': None, 'tool_calls': [
+        {'id': 'chatcmpl-tool-924d705adb044ff88e0ef3afdd155f15', 'function': {'arguments': '{"location": "San Francisco, CA, USA"}', 'name': 'get_current_temperature'}, 'type': 'function'},
+        {'id': 'chatcmpl-tool-7e30313081944b11b6e5ebfd02e8e501', 'function': {'arguments': '{"location": "San Francisco, CA, USA", "date": "2024-10-01"}', 'name': 'get_temperature_date'}, 'type': 'function'},
     ]},
-    {'role': 'tool', 'content': '{"temperature": 26.1, "location": "San Francisco, CA, USA", "unit": "celsius"}', 'tool_call_id': 'call_xx'},
-    {'role': 'tool', 'content': '{"temperature": 25.9, "location": "San Francisco, CA, USA", "date": "2024-09-01", "unit": "celsius"}', 'tool_call_id': 'call_xxx'},
+    {'role': 'tool', 'content': '{"temperature": 26.1, "location": "San Francisco, CA, USA", "unit": "celsius"}', 'tool_call_id': 'chatcmpl-tool-924d705adb044ff88e0ef3afdd155f15'},
+    {'role': 'tool', 'content': '{"temperature": 25.9, "location": "San Francisco, CA, USA", "date": "2024-10-01", "unit": "celsius"}', 'tool_call_id': 'chatcmpl-tool-7e30313081944b11b6e5ebfd02e8e501'},
 ]
 ```
 
@@ -886,7 +897,7 @@ The messages are now like
 Let's call the endpoint again to seed the tool results and get response:
 ```python
 response = client.chat.completions.create(
-    model="Qwen/Qwen2.5-7B-Instruct",
+    model=model_name,
     messages=messages,
     tools=tools,
     temperature=0.7,
@@ -896,16 +907,15 @@ response = client.chat.completions.create(
         "repetition_penalty": 1.05,
     },
 )
-```
 
-The final response (`response.choices[0].message`) should be like
+messages.append(response.choices[0].message.model_dump())
 ```
-{"role": "assistant", "content": "The current temperature in San Francisco is 26.1 degrees Celsius. For tomorrow, the forecasted temperature is 25.9 degrees Celsius."}
 
+The final response (`response.choices[0].message.content`) should be like
+```text
+The current temperature in San Francisco is approximately 26.1°C. For tomorrow, the forecasted temperature is around 25.9°C.
 ```
 
-
-
 ### Discussions
 
 Now, we have introduced how to conduct inference with function calling using Qwen2 in three different frameworks!
@@ -952,7 +962,6 @@ In addition, there are more on the model side of function calling, which means y
   While we strive to improve Qwen2.5 in this regard, edge cases are unlikely to be eliminated completely.
 
 
-
 ## Function Calling Templates
 
 The template design for function calling often includes the following aspects:
@@ -1005,7 +1014,7 @@ The model will simply continue the texts.
 One should write the code to actively detect which step the model is at and in particular to add the observations in the process, until the Final Answer is generated.
 
 However, as most programming interfaces accept the message structure, there should be some kind of adapter between the two.
-[The ReAct Chat Agent](https://github.com/QwenLM/Qwen-Agent/blob/v0.0.9/qwen_agent/agents/react_chat.py) in Qwen-Agent facilitates this kind of conversion.
+[The ReAct Chat Agent](https://github.com/QwenLM/Qwen-Agent/blob/v0.0.10/qwen_agent/agents/react_chat.py) in Qwen-Agent facilitates this kind of conversion.
 
 ### Qwen2 Function Calling Template
 
@@ -1099,7 +1108,7 @@ What's the temperature in San Francisco now? How about tomorrow?<|im_end|>
 ```
 
 
-[Previously](#note-official-template), we have said that it is hard to adapt it for other frameworks that use less capable templating engines.
+This template is hard to adapt it for other frameworks that use less capable templating engines.
 But it is doable at least partially for Jinja, which is Python-oriented after all.
 We didn't use it because using the template in `transformers` leads to more changes to the inference usage, which are not very common for beginners.
 
@@ -1204,7 +1213,7 @@ They final text should look like the following:
 <|im_start|>system
 You are Qwen, created by Alibaba Cloud. You are a helpful assistant.
 
-Current Date: 2024-08-31
+Current Date: 2024-09-30
 
 # Tools
 
@@ -1212,8 +1221,8 @@ You may call one or more functions to assist with the user query.
 
 You are provided with function signatures within <tools></tools> XML tags:
 <tools>
-{"name": "get_current_temperature", "description": "Get current temperature at a location.", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The location to get the temperature for, in the format \"City, State, Country\"."}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The unit to return the temperature in. Defaults to \"celsius\"."}}, "required": ["location"]}}
-{"name": "get_temperature_date", "description": "Get temperature at a location and date.", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The location to get the temperature for, in the format \"City, State, Country\"."}, "date": {"type": "string", "description": "The date to get the temperature for, in the format \"Year-Month-Day\"."}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The unit to return the temperature in. Defaults to \"celsius\"."}}, "required": ["location", "date"]}}
+{"type": "function", "function": {"name": "get_current_temperature", "description": "Get current temperature at a location.", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The location to get the temperature for, in the format \"City, State, Country\"."}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The unit to return the temperature in. Defaults to \"celsius\"."}}, "required": ["location"]}}}
+{"type": "function", "function": {"name": "get_temperature_date", "description": "Get temperature at a location and date.", "parameters": {"type": "object", "properties": {"location": {"type": "string", "description": "The location to get the temperature for, in the format \"City, State, Country\"."}, "date": {"type": "string", "description": "The date to get the temperature for, in the format \"Year-Month-Day\"."}, "unit": {"type": "string", "enum": ["celsius", "fahrenheit"], "description": "The unit to return the temperature in. Defaults to \"celsius\"."}}, "required": ["location", "date"]}}}
 </tools>
 
 For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
@@ -1224,20 +1233,20 @@ For each function call, return a json object with function name and arguments wi
 What's the temperature in San Francisco now? How about tomorrow?<|im_end|>
 <|im_start|>assistant
 <tool_call>
-{"name": "get_current_temperature", "arguments": {"location": "San Francisco, CA, USA", "unit": "celsius"}}
+{"name": "get_current_temperature", "arguments": {"location": "San Francisco, CA, USA"}}
 </tool_call>
 <tool_call>
-{"name": "get_temperature_date", "arguments": {"location": "San Francisco, CA, USA", "date": "2024-09-01", "unit": "celsius"}}
+{"name": "get_temperature_date", "arguments": {"location": "San Francisco, CA, USA", "date": "2024-10-01"}}
 </tool_call><|im_end|>
 <|im_start|>user
 <tool_response>
 {"temperature": 26.1, "location": "San Francisco, CA, USA", "unit": "celsius"}
 </tool_response>
 <tool_response>
-{"temperature": 25.9, "location": "San Francisco, CA, USA", "date": "2024-09-01", "unit": "celsius"}
+{"temperature": 25.9, "location": "San Francisco, CA, USA", "date": "2024-10-01", "unit": "celsius"}
 </tool_response><|im_end|>
 <|im_start|>assistant
-The current temperature in San Francisco is 26.1°C. The temperature for tomorrow in San Francisco is expected to be 25.9°C.<|im_end|>
+The current temperature in San Francisco is approximately 26.1°C. Tomorrow, on October 1, 2024, the temperature is expected to be around 25.9°C.<|im_end|>
 ```
 
 While the text may seem different from the previous one, the basic prompting structure is still the same.
diff --git a/examples/demo/cli_demo.py b/examples/demo/cli_demo.py
index 65e50b4c..188c8af9 100644
--- a/examples/demo/cli_demo.py
+++ b/examples/demo/cli_demo.py
@@ -16,17 +16,17 @@
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 from transformers.trainer_utils import set_seed
 
-DEFAULT_CKPT_PATH = 'Qwen/Qwen2-7B-Instruct'
+DEFAULT_CKPT_PATH = "Qwen/Qwen2.5-7B-Instruct"
 
-_WELCOME_MSG = '''\
-Welcome to use Qwen2-Instruct model, type text to start chat, type :h to show command help.
-(欢迎使用 Qwen2-Instruct 模型，输入内容即可进行对话，:h 显示命令帮助。)
+_WELCOME_MSG = """\
+Welcome to use Qwen2.5-Instruct model, type text to start chat, type :h to show command help.
+(欢迎使用 Qwen2.5-Instruct 模型，输入内容即可进行对话，:h 显示命令帮助。)
 
-Note: This demo is governed by the original license of Qwen2.
+Note: This demo is governed by the original license of Qwen2.5.
 We strongly advise users not to knowingly generate or allow others to knowingly generate harmful content, including hate speech, violence, pornography, deception, etc.
-(注：本演示受Qwen2的许可协议限制。我们强烈建议，用户不应传播及不应允许他人传播以下内容，包括但不限于仇恨言论、暴力、色情、欺诈相关的有害信息。)
-'''
-_HELP_MSG = '''\
+(注：本演示受Qwen2.5的许可协议限制。我们强烈建议，用户不应传播及不应允许他人传播以下内容，包括但不限于仇恨言论、暴力、色情、欺诈相关的有害信息。)
+"""
+_HELP_MSG = """\
 Commands:
     :help / :h              Show this help message              显示帮助信息
     :exit / :quit / :q      Exit the demo                       退出Demo
@@ -38,10 +38,22 @@
     :conf                   Show current generation config      显示生成配置
     :conf <key>=<value>     Change generation config            修改生成配置
     :reset-conf             Reset generation config             重置生成配置
-'''
+"""
 _ALL_COMMAND_NAMES = [
-    'help', 'h', 'exit', 'quit', 'q', 'clear', 'cl', 'clear-history', 'clh', 'history', 'his',
-    'seed', 'conf', 'reset-conf',
+    "help",
+    "h",
+    "exit",
+    "quit",
+    "q",
+    "clear",
+    "cl",
+    "clear-history",
+    "clh",
+    "history",
+    "his",
+    "seed",
+    "conf",
+    "reset-conf",
 ]
 
 
@@ -57,18 +69,21 @@ def _completer(text, state):
         nonlocal _matches
 
         if state == 0:
-            _matches = [cmd_name for cmd_name in _ALL_COMMAND_NAMES if cmd_name.startswith(text)]
+            _matches = [
+                cmd_name for cmd_name in _ALL_COMMAND_NAMES if cmd_name.startswith(text)
+            ]
         if 0 <= state < len(_matches):
             return _matches[state]
         return None
 
     readline.set_completer(_completer)
-    readline.parse_and_bind('tab: complete')
+    readline.parse_and_bind("tab: complete")
 
 
 def _load_model_tokenizer(args):
     tokenizer = AutoTokenizer.from_pretrained(
-        args.checkpoint_path, resume_download=True,
+        args.checkpoint_path,
+        resume_download=True,
     )
 
     if args.cpu_only:
@@ -82,13 +97,14 @@ def _load_model_tokenizer(args):
         device_map=device_map,
         resume_download=True,
     ).eval()
-    model.generation_config.max_new_tokens = 2048    # For chat.
+    model.generation_config.max_new_tokens = 2048  # For chat.
 
     return model, tokenizer
 
 
 def _gc():
     import gc
+
     gc.collect()
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
@@ -103,46 +119,46 @@ def _clear_screen():
 
 def _print_history(history):
     terminal_width = shutil.get_terminal_size()[0]
-    print(f'History ({len(history)})'.center(terminal_width, '='))
+    print(f"History ({len(history)})".center(terminal_width, "="))
     for index, (query, response) in enumerate(history):
-        print(f'User[{index}]: {query}')
-        print(f'QWen[{index}]: {response}')
-    print('=' * terminal_width)
+        print(f"User[{index}]: {query}")
+        print(f"Qwen[{index}]: {response}")
+    print("=" * terminal_width)
 
 
 def _get_input() -> str:
     while True:
         try:
-            message = input('User> ').strip()
+            message = input("User> ").strip()
         except UnicodeDecodeError:
-            print('[ERROR] Encoding error in input')
+            print("[ERROR] Encoding error in input")
             continue
         except KeyboardInterrupt:
             exit(1)
         if message:
             return message
-        print('[ERROR] Query is empty')
+        print("[ERROR] Query is empty")
 
 
 def _chat_stream(model, tokenizer, query, history):
-    conversation = [
-        {'role': 'system', 'content': 'You are a helpful assistant.'},
-    ]
+    conversation = []
     for query_h, response_h in history:
-        conversation.append({'role': 'user', 'content': query_h})
-        conversation.append({'role': 'assistant', 'content': response_h})
-    conversation.append({'role': 'user', 'content': query})
-    inputs = tokenizer.apply_chat_template(
+        conversation.append({"role": "user", "content": query_h})
+        conversation.append({"role": "assistant", "content": response_h})
+    conversation.append({"role": "user", "content": query})
+    input_text = tokenizer.apply_chat_template(
         conversation,
         add_generation_prompt=True,
-        return_tensors='pt',
+        tokenize=False,
     )
-    inputs = inputs.to(model.device)
-    streamer = TextIteratorStreamer(tokenizer=tokenizer, skip_prompt=True, timeout=60.0, skip_special_tokens=True)
-    generation_kwargs = dict(
-        input_ids=inputs,
-        streamer=streamer,
+    inputs = tokenizer([input_text], return_tensors="pt").to(model.device)
+    streamer = TextIteratorStreamer(
+        tokenizer=tokenizer, skip_prompt=True, timeout=60.0, skip_special_tokens=True
     )
+    generation_kwargs = {
+        **inputs,
+        "streamer": streamer,
+    }
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
     thread.start()
 
@@ -152,14 +168,22 @@ def _chat_stream(model, tokenizer, query, history):
 
 def main():
     parser = argparse.ArgumentParser(
-        description='QWen2-Instruct command-line interactive chat demo.')
-    parser.add_argument("-c", "--checkpoint-path", type=str, default=DEFAULT_CKPT_PATH,
-                        help="Checkpoint name or path, default to %(default)r")
+        description="Qwen2.5-Instruct command-line interactive chat demo."
+    )
+    parser.add_argument(
+        "-c",
+        "--checkpoint-path",
+        type=str,
+        default=DEFAULT_CKPT_PATH,
+        help="Checkpoint name or path, default to %(default)r",
+    )
     parser.add_argument("-s", "--seed", type=int, default=1234, help="Random seed")
-    parser.add_argument("--cpu-only", action="store_true", help="Run demo with CPU only")
+    parser.add_argument(
+        "--cpu-only", action="store_true", help="Run demo with CPU only"
+    )
     args = parser.parse_args()
 
-    history, response = [], ''
+    history, response = [], ""
 
     model, tokenizer = _load_model_tokenizer(args)
     orig_gen_config = deepcopy(model.generation_config)
@@ -175,66 +199,73 @@ def main():
         query = _get_input()
 
         # Process commands.
-        if query.startswith(':'):
+        if query.startswith(":"):
             command_words = query[1:].strip().split()
             if not command_words:
-                command = ''
+                command = ""
             else:
                 command = command_words[0]
 
-            if command in ['exit', 'quit', 'q']:
+            if command in ["exit", "quit", "q"]:
                 break
-            elif command in ['clear', 'cl']:
+            elif command in ["clear", "cl"]:
                 _clear_screen()
                 print(_WELCOME_MSG)
                 _gc()
                 continue
-            elif command in ['clear-history', 'clh']:
-                print(f'[INFO] All {len(history)} history cleared')
+            elif command in ["clear-history", "clh"]:
+                print(f"[INFO] All {len(history)} history cleared")
                 history.clear()
                 _gc()
                 continue
-            elif command in ['help', 'h']:
+            elif command in ["help", "h"]:
                 print(_HELP_MSG)
                 continue
-            elif command in ['history', 'his']:
+            elif command in ["history", "his"]:
                 _print_history(history)
                 continue
-            elif command in ['seed']:
+            elif command in ["seed"]:
                 if len(command_words) == 1:
-                    print(f'[INFO] Current random seed: {seed}')
+                    print(f"[INFO] Current random seed: {seed}")
                     continue
                 else:
                     new_seed_s = command_words[1]
                     try:
                         new_seed = int(new_seed_s)
                     except ValueError:
-                        print(f'[WARNING] Fail to change random seed: {new_seed_s!r} is not a valid number')
+                        print(
+                            f"[WARNING] Fail to change random seed: {new_seed_s!r} is not a valid number"
+                        )
                     else:
-                        print(f'[INFO] Random seed changed to {new_seed}')
+                        print(f"[INFO] Random seed changed to {new_seed}")
                         seed = new_seed
                     continue
-            elif command in ['conf']:
+            elif command in ["conf"]:
                 if len(command_words) == 1:
                     print(model.generation_config)
                 else:
                     for key_value_pairs_str in command_words[1:]:
-                        eq_idx = key_value_pairs_str.find('=')
+                        eq_idx = key_value_pairs_str.find("=")
                         if eq_idx == -1:
-                            print('[WARNING] format: <key>=<value>')
+                            print("[WARNING] format: <key>=<value>")
                             continue
-                        conf_key, conf_value_str = key_value_pairs_str[:eq_idx], key_value_pairs_str[eq_idx + 1:]
+                        conf_key, conf_value_str = (
+                            key_value_pairs_str[:eq_idx],
+                            key_value_pairs_str[eq_idx + 1 :],
+                        )
                         try:
                             conf_value = eval(conf_value_str)
                         except Exception as e:
                             print(e)
                             continue
                         else:
-                            print(f'[INFO] Change config: model.generation_config.{conf_key} = {conf_value}')
+                            print(
+                                f"[INFO] Change config: model.generation_config.{conf_key} = {conf_value}"
+                            )
                             setattr(model.generation_config, conf_key, conf_value)
                 continue
-            elif command in ['reset-conf']:
-                print('[INFO] Reset generation config')
+            elif command in ["reset-conf"]:
+                print("[INFO] Reset generation config")
                 model.generation_config = deepcopy(orig_gen_config)
                 print(model.generation_config)
                 continue
@@ -246,17 +277,17 @@ def main():
         set_seed(seed)
         _clear_screen()
         print(f"\nUser: {query}")
-        print(f"\nQwen2-Instruct: ", end="")
+        print(f"\nQwen: ", end="")
         try:
-            partial_text = ''
+            partial_text = ""
             for new_text in _chat_stream(model, tokenizer, query, history):
-                print(new_text, end='', flush=True)
+                print(new_text, end="", flush=True)
                 partial_text += new_text
             response = partial_text
             print()
 
         except KeyboardInterrupt:
-            print('[WARNING] Generation interrupted')
+            print("[WARNING] Generation interrupted")
             continue
 
         history.append((query, response))
diff --git a/examples/demo/web_demo.py b/examples/demo/web_demo.py
index cd690bb1..e3971fb5 100644
--- a/examples/demo/web_demo.py
+++ b/examples/demo/web_demo.py
@@ -12,23 +12,40 @@
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer, TextIteratorStreamer
 
-DEFAULT_CKPT_PATH = 'Qwen/Qwen2-7B-Instruct'
+DEFAULT_CKPT_PATH = "Qwen/Qwen2.5-7B-Instruct"
 
 
 def _get_args():
-    parser = ArgumentParser()
-    parser.add_argument("-c", "--checkpoint-path", type=str, default=DEFAULT_CKPT_PATH,
-                        help="Checkpoint name or path, default to %(default)r")
-    parser.add_argument("--cpu-only", action="store_true", help="Run demo with CPU only")
-
-    parser.add_argument("--share", action="store_true", default=False,
-                        help="Create a publicly shareable link for the interface.")
-    parser.add_argument("--inbrowser", action="store_true", default=False,
-                        help="Automatically launch the interface in a new tab on the default browser.")
-    parser.add_argument("--server-port", type=int, default=8000,
-                        help="Demo server port.")
-    parser.add_argument("--server-name", type=str, default="127.0.0.1",
-                        help="Demo server name.")
+    parser = ArgumentParser(description="Qwen2.5-Instruct web chat demo.")
+    parser.add_argument(
+        "-c",
+        "--checkpoint-path",
+        type=str,
+        default=DEFAULT_CKPT_PATH,
+        help="Checkpoint name or path, default to %(default)r",
+    )
+    parser.add_argument(
+        "--cpu-only", action="store_true", help="Run demo with CPU only"
+    )
+
+    parser.add_argument(
+        "--share",
+        action="store_true",
+        default=False,
+        help="Create a publicly shareable link for the interface.",
+    )
+    parser.add_argument(
+        "--inbrowser",
+        action="store_true",
+        default=False,
+        help="Automatically launch the interface in a new tab on the default browser.",
+    )
+    parser.add_argument(
+        "--server-port", type=int, default=8000, help="Demo server port."
+    )
+    parser.add_argument(
+        "--server-name", type=str, default="127.0.0.1", help="Demo server name."
+    )
 
     args = parser.parse_args()
     return args
@@ -36,7 +53,8 @@ def _get_args():
 
 def _load_model_tokenizer(args):
     tokenizer = AutoTokenizer.from_pretrained(
-        args.checkpoint_path, resume_download=True,
+        args.checkpoint_path,
+        resume_download=True,
     )
 
     if args.cpu_only:
@@ -50,30 +68,30 @@ def _load_model_tokenizer(args):
         device_map=device_map,
         resume_download=True,
     ).eval()
-    model.generation_config.max_new_tokens = 2048   # For chat.
+    model.generation_config.max_new_tokens = 2048  # For chat.
 
     return model, tokenizer
 
 
 def _chat_stream(model, tokenizer, query, history):
-    conversation = [
-        {'role': 'system', 'content': 'You are a helpful assistant.'},
-    ]
+    conversation = []
     for query_h, response_h in history:
-        conversation.append({'role': 'user', 'content': query_h})
-        conversation.append({'role': 'assistant', 'content': response_h})
-    conversation.append({'role': 'user', 'content': query})
-    inputs = tokenizer.apply_chat_template(
+        conversation.append({"role": "user", "content": query_h})
+        conversation.append({"role": "assistant", "content": response_h})
+    conversation.append({"role": "user", "content": query})
+    input_text = tokenizer.apply_chat_template(
         conversation,
         add_generation_prompt=True,
-        return_tensors='pt',
+        tokenize=False,
     )
-    inputs = inputs.to(model.device)
-    streamer = TextIteratorStreamer(tokenizer=tokenizer, skip_prompt=True, timeout=60.0, skip_special_tokens=True)
-    generation_kwargs = dict(
-        input_ids=inputs,
-        streamer=streamer,
+    inputs = tokenizer([input_text], return_tensors="pt").to(model.device)
+    streamer = TextIteratorStreamer(
+        tokenizer=tokenizer, skip_prompt=True, timeout=60.0, skip_special_tokens=True
     )
+    generation_kwargs = {
+        **inputs,
+        "streamer": streamer,
+    }
     thread = Thread(target=model.generate, kwargs=generation_kwargs)
     thread.start()
 
@@ -83,13 +101,13 @@ def _chat_stream(model, tokenizer, query, history):
 
 def _gc():
     import gc
+
     gc.collect()
     if torch.cuda.is_available():
         torch.cuda.empty_cache()
 
 
 def _launch_demo(args, model, tokenizer):
-
     def predict(_query, _chatbot, _task_history):
         print(f"User: {_query}")
         _chatbot.append((_query, ""))
@@ -104,7 +122,7 @@ def predict(_query, _chatbot, _task_history):
 
         print(f"History: {_task_history}")
         _task_history.append((_query, full_response))
-        print(f"Qwen2-Instruct: {full_response}")
+        print(f"Qwen: {full_response}")
 
     def regenerate(_chatbot, _task_history):
         if not _task_history:
@@ -125,22 +143,24 @@ def reset_state(_chatbot, _task_history):
 
     with gr.Blocks() as demo:
         gr.Markdown("""\
-<p align="center"><img src="https://qianwen-res.oss-accelerate-overseas.aliyuncs.com/logo_qwen2.png" style="height: 80px"/><p>""")
-        gr.Markdown("""<center><font size=8>Qwen2 Chat Bot</center>""")
+<p align="center"><img src="https://qianwen-res.oss-accelerate-overseas.aliyuncs.com/assets/logo/qwen2.5_logo.png" style="height: 120px"/><p>""")
         gr.Markdown(
             """\
-<center><font size=3>This WebUI is based on Qwen2-Instruct, developed by Alibaba Cloud. \
-(本WebUI基于Qwen2-Instruct打造，实现聊天机器人功能。)</center>""")
+<center><font size=3>This WebUI is based on Qwen2.5-Instruct, developed by Alibaba Cloud. \
+(本WebUI基于Qwen2.5-Instruct打造，实现聊天机器人功能。)</center>"""
+        )
         gr.Markdown("""\
 <center><font size=4>
-Qwen2-7B-Instruct <a href="https://modelscope.cn/models/qwen/Qwen2-7B-Instruct/summary">🤖 </a> | 
-<a href="https://huggingface.co/Qwen/Qwen2-7B-Instruct">🤗</a>&nbsp ｜ 
-Qwen2-72B-Instruct <a href="https://modelscope.cn/models/qwen/Qwen2-72B-Instruct/summary">🤖 </a> | 
-<a href="https://huggingface.co/Qwen/Qwen2-72B-Instruct">🤗</a>&nbsp ｜ 
-&nbsp<a href="https://github.com/QwenLM/Qwen2">Github</a></center>""")
-
-        chatbot = gr.Chatbot(label='Qwen2-Instruct', elem_classes="control-height")
-        query = gr.Textbox(lines=2, label='Input')
+Qwen2.5-7B-Instruct <a href="https://modelscope.cn/models/qwen/Qwen2.5-7B-Instruct/summary">🤖 </a> | 
+<a href="https://huggingface.co/Qwen/Qwen2.5-7B-Instruct">🤗</a>&nbsp ｜ 
+Qwen2.5-32B-Instruct <a href="https://modelscope.cn/models/qwen/Qwen2.5-32B-Instruct/summary">🤖 </a> | 
+<a href="https://huggingface.co/Qwen/Qwen2.5-32B-Instruct">🤗</a>&nbsp ｜ 
+Qwen2.5-72B-Instruct <a href="https://modelscope.cn/models/qwen/Qwen2.5-72B-Instruct/summary">🤖 </a> | 
+<a href="https://huggingface.co/Qwen/Qwen2.5-72B-Instruct">🤗</a>&nbsp ｜ 
+&nbsp<a href="https://github.com/QwenLM/Qwen2.5">Github</a></center>""")
+
+        chatbot = gr.Chatbot(label="Qwen", elem_classes="control-height")
+        query = gr.Textbox(lines=2, label="Input")
         task_history = gr.State([])
 
         with gr.Row():
@@ -148,16 +168,22 @@ def reset_state(_chatbot, _task_history):
             submit_btn = gr.Button("🚀 Submit (发送)")
             regen_btn = gr.Button("🤔️ Regenerate (重试)")
 
-        submit_btn.click(predict, [query, chatbot, task_history], [chatbot], show_progress=True)
+        submit_btn.click(
+            predict, [query, chatbot, task_history], [chatbot], show_progress=True
+        )
         submit_btn.click(reset_user_input, [], [query])
-        empty_btn.click(reset_state, [chatbot, task_history], outputs=[chatbot], show_progress=True)
-        regen_btn.click(regenerate, [chatbot, task_history], [chatbot], show_progress=True)
+        empty_btn.click(
+            reset_state, [chatbot, task_history], outputs=[chatbot], show_progress=True
+        )
+        regen_btn.click(
+            regenerate, [chatbot, task_history], [chatbot], show_progress=True
+        )
 
         gr.Markdown("""\
-<font size=2>Note: This demo is governed by the original license of Qwen2. \
+<font size=2>Note: This demo is governed by the original license of Qwen2.5. \
 We strongly advise users not to knowingly generate or allow others to knowingly generate harmful content, \
 including hate speech, violence, pornography, deception, etc. \
-(注：本演示受Qwen2的许可协议限制。我们强烈建议，用户不应传播及不应允许他人传播以下内容，\
+(注：本演示受Qwen2.5的许可协议限制。我们强烈建议，用户不应传播及不应允许他人传播以下内容，\
 包括但不限于仇恨言论、暴力、色情、欺诈相关的有害信息。)""")
 
     demo.queue().launch(
@@ -176,5 +202,5 @@ def main():
     _launch_demo(args, model, tokenizer)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()
diff --git a/examples/gcu-support/README.md b/examples/gcu-support/README.md
index 514c9635..b0ab878c 100644
--- a/examples/gcu-support/README.md
+++ b/examples/gcu-support/README.md
@@ -1,4 +1,4 @@
-# Qwen2 推理
+# Qwen2.5 推理
 
 ## 1、配置运行环境
 
@@ -49,7 +49,7 @@ pip3.8 install accelerate
 ```
 python3.8 gcu_demo.py
 ```
-执行 gcu_demo.py 推理示例，代码改编自 [仓库 README](https://github.com/QwenLM/Qwen2/blob/main/README.md) 中的给的 Huggingface quick start 用例。
+执行 gcu_demo.py 推理示例，代码改编自 [仓库 README](https://github.com/QwenLM/Qwen2.5/blob/main/README.md) 中的给的 Huggingface quick start 用例。
 
 **GCU PyTorch 原生推理支持**
 
@@ -73,5 +73,5 @@ GCU 支持 pytorch 原生推理，在 pytorch 代码上只需做少许改动就
 GCU 也支持 *vLLM* 原生推理，需要安装 GCU 版本的 *vLLM* 后，将设备名改为 gcu
 
 ```
-python -m vllm.entrypoints.openai.api_server --served-model-name Qwen2-7B-Instruct --model Qwen/Qwen2-7B-Instruct --device gcu
+python -m vllm.entrypoints.openai.api_server --served-model-name Qwen2.5-7B-Instruct --model Qwen/Qwen2.5-7B-Instruct --device gcu
 ```
diff --git a/examples/gcu-support/gcu_demo.py b/examples/gcu-support/gcu_demo.py
index 94e78b94..ad09c076 100644
--- a/examples/gcu-support/gcu_demo.py
+++ b/examples/gcu-support/gcu_demo.py
@@ -6,7 +6,7 @@
 
 from transformers import AutoModelForCausalLM, AutoTokenizer
 
-model_name = "Qwen/Qwen2-7B-Instruct"
+model_name = "Qwen/Qwen2.5-7B-Instruct"
 device = "gcu" # the device to load the model onto
 
 model = AutoModelForCausalLM.from_pretrained(