From d81c6672ca34c81e30dad0ada85f5a410e504f88 Mon Sep 17 00:00:00 2001
From: AllentDan <AllentDan@yeah.net>
Date: Fri, 20 Sep 2024 13:33:56 +0800
Subject: [PATCH 1/5] Refine proxy server usage

---
 docs/en/llm/proxy_server.md         | 61 ++++++++++++++++++++++++++++
 docs/zh_cn/llm/proxy_server.md      | 62 ++++++++++++++++++++++++++++-
 lmdeploy/cli/serve.py               |  7 +++-
 lmdeploy/serve/openai/api_server.py | 34 ++++++++++++++++
 lmdeploy/serve/proxy/proxy.py       | 36 +++++++++++++++++
 5 files changed, 198 insertions(+), 2 deletions(-)

diff --git a/docs/en/llm/proxy_server.md b/docs/en/llm/proxy_server.md
index 1169794d4..0cb96f7a1 100644
--- a/docs/en/llm/proxy_server.md
+++ b/docs/en/llm/proxy_server.md
@@ -11,6 +11,8 @@ python3 -m lmdeploy.serve.proxy.proxy --server_name {server_name} --server_port
 ```
 
 After startup is successful, the URL of the proxy service will also be printed by the script. Access this URL in your browser to open the Swagger UI.
+Subsequently, users can add it directly to the proxy service when starting the `api_server` service by using the `--proxy-url` command. For example:
+`lmdeploy serve api_server InternLM/internlm2-chat-1_8b --proxy-url http://0.0.0.0:10086`。
 
 ## API
 
@@ -30,6 +32,65 @@ APIs related to usage include:
 
 The usage of these APIs is the same as that of api_server.
 
+### add, delete and query through commands
+
+```shell
+curl -X 'GET' \
+  'http://localhost:10086/nodes/status' \
+  -H 'accept: application/json'
+```
+
+```shell
+curl -X 'POST' \
+  'http://localhost:10086/nodes/add' \
+  -H 'accept: application/json' \
+  -H 'Content-Type: application/json' \
+  -d '{
+  "url": "http://0.0.0.0:23333"
+}'
+```
+
+```shell
+curl -X 'POST' \
+  'http://localhost:10086/nodes/remove?node_url=http://0.0.0.0:23333' \
+  -H 'accept: application/json' \
+  -d ''
+```
+
+### add, delete and query through python
+
+```python
+# query all nodes
+import requests
+url = 'http://localhost:10086/nodes/status'
+headers = {'accept': 'application/json'}
+response = requests.get(url, headers=headers)
+print(response.text)
+```
+
+```python
+# add a new node
+import requests
+url = 'http://localhost:10086/nodes/add'
+headers = {
+    'accept': 'application/json',
+    'Content-Type': 'application/json'
+}
+data = {"url": "http://0.0.0.0:23333"}
+response = requests.post(url, headers=headers, json=data)
+print(response.text)
+```
+
+```python
+# delete a node
+import requests
+url = 'http://localhost:10086/nodes/remove'
+headers = {'accept': 'application/json',}
+params = {'node_url': 'http://0.0.0.0:23333',}
+response = requests.post(url, headers=headers, data='', params=params)
+print(response.text)
+```
+
 ## Dispatch Strategy
 
 The current distribution strategies of the proxy service are as follows:
diff --git a/docs/zh_cn/llm/proxy_server.md b/docs/zh_cn/llm/proxy_server.md
index 79d8e45f6..e14c7d766 100644
--- a/docs/zh_cn/llm/proxy_server.md
+++ b/docs/zh_cn/llm/proxy_server.md
@@ -11,6 +11,7 @@ python3 -m lmdeploy.serve.proxy.proxy --server_name {server_name} --server_port
 ```
 
 启动成功后，代理服务的 URL 也会被脚本打印。浏览器访问这个 URL，可以打开 Swagger UI。
+随后，用户可以在启动 api_server 服务的时候，通过 `--proxy-url` 命令将其直接添加到代理服务中。例如：`lmdeploy serve api_server InternLM/internlm2-chat-1_8b --proxy-url http://0.0.0.0:10086`。
 
 ## API
 
@@ -20,7 +21,7 @@ python3 -m lmdeploy.serve.proxy.proxy --server_name {server_name} --server_port
 - /nodes/add
 - /nodes/remove
 
-他们分别表示，查看所有的 api_server 服务节点，增加某个节点，删除某个节点。
+他们分别表示，查看所有的 api_server 服务节点，增加某个节点，删除某个节点。他们的使用方式，最直接的可以在浏览器里面直接操作。也可以通过命令行或者 python 操作。
 
 和使用相关的 api 有：
 
@@ -30,6 +31,65 @@ python3 -m lmdeploy.serve.proxy.proxy --server_name {server_name} --server_port
 
 这些 API 的使用方式和 api_server 一样。
 
+### 通过 command 增删查
+
+```shell
+curl -X 'GET' \
+  'http://localhost:10086/nodes/status' \
+  -H 'accept: application/json'
+```
+
+```shell
+curl -X 'POST' \
+  'http://localhost:10086/nodes/add' \
+  -H 'accept: application/json' \
+  -H 'Content-Type: application/json' \
+  -d '{
+  "url": "http://0.0.0.0:23333"
+}'
+```
+
+```shell
+curl -X 'POST' \
+  'http://localhost:10086/nodes/remove?node_url=http://0.0.0.0:23333' \
+  -H 'accept: application/json' \
+  -d ''
+```
+
+### 通过 python 脚本增删查
+
+```python
+# 查询所有节点
+import requests
+url = 'http://localhost:10086/nodes/status'
+headers = {'accept': 'application/json'}
+response = requests.get(url, headers=headers)
+print(response.text)
+```
+
+```python
+# 添加新节点
+import requests
+url = 'http://localhost:10086/nodes/add'
+headers = {
+    'accept': 'application/json',
+    'Content-Type': 'application/json'
+}
+data = {"url": "http://0.0.0.0:23333"}
+response = requests.post(url, headers=headers, json=data)
+print(response.text)
+```
+
+```python
+# 删除某个节点
+import requests
+url = 'http://localhost:10086/nodes/remove'
+headers = {'accept': 'application/json',}
+params = {'node_url': 'http://0.0.0.0:23333',}
+response = requests.post(url, headers=headers, data='', params=params)
+print(response.text)
+```
+
 ## 分发策略
 
 代理服务目前的分发策略如下：
diff --git a/lmdeploy/cli/serve.py b/lmdeploy/cli/serve.py
index 7dca6403b..213856e9f 100644
--- a/lmdeploy/cli/serve.py
+++ b/lmdeploy/cli/serve.py
@@ -130,6 +130,10 @@ def add_parser_api_server():
                             type=str,
                             default=['*'],
                             help='A list of allowed http headers for cors')
+        parser.add_argument('--proxy-url',
+                            type=str,
+                            default=None,
+                            help='The proxy url for api server.')
         # common args
         ArgumentHelper.backend(parser)
         ArgumentHelper.log_level(parser)
@@ -298,7 +302,8 @@ def api_server(args):
                        allow_headers=args.allow_headers,
                        log_level=args.log_level.upper(),
                        api_keys=args.api_keys,
-                       ssl=args.ssl)
+                       ssl=args.ssl,
+                       proxy_url=args.proxy_url)
 
     @staticmethod
     def api_client(args):
diff --git a/lmdeploy/serve/openai/api_server.py b/lmdeploy/serve/openai/api_server.py
index 34a134973..ddcb7ba37 100644
--- a/lmdeploy/serve/openai/api_server.py
+++ b/lmdeploy/serve/openai/api_server.py
@@ -40,6 +40,9 @@ class VariableInterface:
     session_id: int = 0
     api_keys: Optional[List[str]] = None
     request_hosts = []
+    # following are for registering to proxy server
+    proxy_url: Optional[str] = None
+    api_server_url: Optional[str] = None
 
 
 app = FastAPI(docs_url='/')
@@ -926,6 +929,33 @@ async def stream_results() -> AsyncGenerator[bytes, None]:
         return JSONResponse(ret)
 
 
+@app.on_event('startup')
+async def startup_event():
+    if VariableInterface.proxy_url is None:
+        return
+    try:
+        import requests
+        url = f'{VariableInterface.proxy_url}/nodes/add'
+        data = {
+            'url': VariableInterface.api_server_url,
+            'status': {
+                'models': get_model_list()
+            }
+        }
+        headers = {
+            'accept': 'application/json',
+            'Content-Type': 'application/json'
+        }
+        response = requests.post(url, headers=headers, json=data)
+
+        if response.status_code != 200:
+            raise HTTPException(status_code=400,
+                                detail='Service registration failed')
+        print(response.text)
+    except Exception as e:
+        print(f'Service registration failed: {e}')
+
+
 def serve(model_path: str,
           model_name: Optional[str] = None,
           backend: Literal['turbomind', 'pytorch'] = 'turbomind',
@@ -941,6 +971,7 @@ def serve(model_path: str,
           log_level: str = 'ERROR',
           api_keys: Optional[Union[List[str], str]] = None,
           ssl: bool = False,
+          proxy_url: Optional[str] = None,
           **kwargs):
     """An example to perform model inference through the command line
     interface.
@@ -1012,6 +1043,9 @@ def serve(model_path: str,
         chat_template_config=chat_template_config,
         **kwargs)
 
+    if proxy_url is not None:
+        VariableInterface.proxy_url = proxy_url
+        VariableInterface.api_server_url = f'{http_or_https}://{server_name}:{server_port}'  # noqa
     for i in range(3):
         print(
             f'HINT:    Please open \033[93m\033[1m{http_or_https}://'
diff --git a/lmdeploy/serve/proxy/proxy.py b/lmdeploy/serve/proxy/proxy.py
index 15d182a3d..7cee968ea 100644
--- a/lmdeploy/serve/proxy/proxy.py
+++ b/lmdeploy/serve/proxy/proxy.py
@@ -4,6 +4,7 @@
 import os
 import os.path as osp
 import random
+import threading
 import time
 from collections import deque
 from http import HTTPStatus
@@ -46,6 +47,17 @@ class Node(BaseModel):
     status: Optional[Status] = None
 
 
+CONTROLLER_HEART_BEAT_EXPIRATION = int(
+    os.getenv('LMDEPLOY_CONTROLLER_HEART_BEAT_EXPIRATION', 90))
+
+
+def heart_beat_controller(proxy_controller):
+    while True:
+        time.sleep(CONTROLLER_HEART_BEAT_EXPIRATION)
+        logger.info('Start heart beat check')
+        proxy_controller.remove_stale_nodes_by_expiration()
+
+
 class NodeManager:
     """Manage all the sub nodes.
 
@@ -77,6 +89,10 @@ def __init__(self,
                 for url, status in self.nodes.items():
                     status = Status(**status)
                     self.nodes[url] = status
+        self.heart_beat_thread = threading.Thread(target=heart_beat_controller,
+                                                  args=(self, ),
+                                                  daemon=True)
+        self.heart_beat_thread.start()
 
     def update_config_file(self):
         """Update the config file."""
@@ -100,6 +116,10 @@ def add(self, node_url: str, status: Optional[Status] = None):
         """
         if status is None:
             status = self.nodes.get(node_url, Status())
+        if status.models is not None:  # force register directly
+            self.nodes[node_url] = status
+            self.update_config_file()
+            return
         try:
             from lmdeploy.serve.openai.api_client import APIClient
             client = APIClient(api_server_url=node_url)
@@ -115,6 +135,22 @@ def remove(self, node_url: str):
             self.nodes.pop(node_url)
             self.update_config_file()
 
+    def remove_stale_nodes_by_expiration(self):
+        """remove stale nodes."""
+        to_be_deleted = []
+        for node_url in self.nodes.keys():
+            url = f'{node_url}/health'
+            headers = {'accept': 'application/json'}
+            try:
+                response = requests.get(url, headers=headers)
+                if response.status_code != 200:
+                    to_be_deleted.append(node_url)
+            except:  # noqa
+                to_be_deleted.append(node_url)
+        for node_url in to_be_deleted:
+            self.remove(node_url)
+            logger.info(f'Removed node_url: {node_url}')
+
     @property
     def model_list(self):
         """Supported model list."""

From b384ae889a4378c7795fa71b6105fb4ba2a155a4 Mon Sep 17 00:00:00 2001
From: AllentDan <AllentDan@yeah.net>
Date: Fri, 20 Sep 2024 13:44:00 +0800
Subject: [PATCH 2/5] fix

---
 lmdeploy/serve/proxy/proxy.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lmdeploy/serve/proxy/proxy.py b/lmdeploy/serve/proxy/proxy.py
index 7cee968ea..fbf3e8471 100644
--- a/lmdeploy/serve/proxy/proxy.py
+++ b/lmdeploy/serve/proxy/proxy.py
@@ -116,7 +116,7 @@ def add(self, node_url: str, status: Optional[Status] = None):
         """
         if status is None:
             status = self.nodes.get(node_url, Status())
-        if status.models is not None:  # force register directly
+        if status.models != []:  # force register directly
             self.nodes[node_url] = status
             self.update_config_file()
             return

From 07830fe8a18cd0044a8dd496d04cc45b3af3e028 Mon Sep 17 00:00:00 2001
From: AllentDan <AllentDan@yeah.net>
Date: Mon, 23 Sep 2024 10:30:31 +0800
Subject: [PATCH 3/5] add cli and refine documents

---
 docs/en/llm/proxy_server.md    | 20 +++++++++---------
 docs/zh_cn/llm/proxy_server.md | 16 +++++++-------
 lmdeploy/cli/serve.py          | 38 ++++++++++++++++++++++++++++++++++
 lmdeploy/serve/proxy/proxy.py  |  4 ++--
 4 files changed, 58 insertions(+), 20 deletions(-)

diff --git a/docs/en/llm/proxy_server.md b/docs/en/llm/proxy_server.md
index 0cb96f7a1..be7857266 100644
--- a/docs/en/llm/proxy_server.md
+++ b/docs/en/llm/proxy_server.md
@@ -7,12 +7,12 @@ The request distributor service can parallelize multiple api_server services. Us
 Start the proxy service:
 
 ```shell
-python3 -m lmdeploy.serve.proxy.proxy --server_name {server_name} --server_port {server_port} --strategy "min_expected_latency"
+lmdeploy serve proxy --server-name {server_name} --server-port {server_port} --strategy "min_expected_latency"
 ```
 
 After startup is successful, the URL of the proxy service will also be printed by the script. Access this URL in your browser to open the Swagger UI.
 Subsequently, users can add it directly to the proxy service when starting the `api_server` service by using the `--proxy-url` command. For example:
-`lmdeploy serve api_server InternLM/internlm2-chat-1_8b --proxy-url http://0.0.0.0:10086`。
+`lmdeploy serve api_server InternLM/internlm2-chat-1_8b --proxy-url http://0.0.0.0:8000`。
 
 ## API
 
@@ -32,17 +32,17 @@ APIs related to usage include:
 
 The usage of these APIs is the same as that of api_server.
 
-### add, delete and query through commands
+### Node Management through curl
 
 ```shell
 curl -X 'GET' \
-  'http://localhost:10086/nodes/status' \
+  'http://localhost:8000/nodes/status' \
   -H 'accept: application/json'
 ```
 
 ```shell
 curl -X 'POST' \
-  'http://localhost:10086/nodes/add' \
+  'http://localhost:8000/nodes/add' \
   -H 'accept: application/json' \
   -H 'Content-Type: application/json' \
   -d '{
@@ -52,17 +52,17 @@ curl -X 'POST' \
 
 ```shell
 curl -X 'POST' \
-  'http://localhost:10086/nodes/remove?node_url=http://0.0.0.0:23333' \
+  'http://localhost:8000/nodes/remove?node_url=http://0.0.0.0:23333' \
   -H 'accept: application/json' \
   -d ''
 ```
 
-### add, delete and query through python
+### Node Management through python
 
 ```python
 # query all nodes
 import requests
-url = 'http://localhost:10086/nodes/status'
+url = 'http://localhost:8000/nodes/status'
 headers = {'accept': 'application/json'}
 response = requests.get(url, headers=headers)
 print(response.text)
@@ -71,7 +71,7 @@ print(response.text)
 ```python
 # add a new node
 import requests
-url = 'http://localhost:10086/nodes/add'
+url = 'http://localhost:8000/nodes/add'
 headers = {
     'accept': 'application/json',
     'Content-Type': 'application/json'
@@ -84,7 +84,7 @@ print(response.text)
 ```python
 # delete a node
 import requests
-url = 'http://localhost:10086/nodes/remove'
+url = 'http://localhost:8000/nodes/remove'
 headers = {'accept': 'application/json',}
 params = {'node_url': 'http://0.0.0.0:23333',}
 response = requests.post(url, headers=headers, data='', params=params)
diff --git a/docs/zh_cn/llm/proxy_server.md b/docs/zh_cn/llm/proxy_server.md
index e14c7d766..103867efa 100644
--- a/docs/zh_cn/llm/proxy_server.md
+++ b/docs/zh_cn/llm/proxy_server.md
@@ -7,11 +7,11 @@
 启动代理服务：
 
 ```shell
-python3 -m lmdeploy.serve.proxy.proxy --server_name {server_name} --server_port {server_port} --strategy "min_expected_latency"
+lmdeploy serve proxy --server-name {server_name} --server-port {server_port} --strategy "min_expected_latency"
 ```
 
 启动成功后，代理服务的 URL 也会被脚本打印。浏览器访问这个 URL，可以打开 Swagger UI。
-随后，用户可以在启动 api_server 服务的时候，通过 `--proxy-url` 命令将其直接添加到代理服务中。例如：`lmdeploy serve api_server InternLM/internlm2-chat-1_8b --proxy-url http://0.0.0.0:10086`。
+随后，用户可以在启动 api_server 服务的时候，通过 `--proxy-url` 命令将其直接添加到代理服务中。例如：`lmdeploy serve api_server InternLM/internlm2-chat-1_8b --proxy-url http://0.0.0.0:8000`。
 
 ## API
 
@@ -35,13 +35,13 @@ python3 -m lmdeploy.serve.proxy.proxy --server_name {server_name} --server_port
 
 ```shell
 curl -X 'GET' \
-  'http://localhost:10086/nodes/status' \
+  'http://localhost:8000/nodes/status' \
   -H 'accept: application/json'
 ```
 
 ```shell
 curl -X 'POST' \
-  'http://localhost:10086/nodes/add' \
+  'http://localhost:8000/nodes/add' \
   -H 'accept: application/json' \
   -H 'Content-Type: application/json' \
   -d '{
@@ -51,7 +51,7 @@ curl -X 'POST' \
 
 ```shell
 curl -X 'POST' \
-  'http://localhost:10086/nodes/remove?node_url=http://0.0.0.0:23333' \
+  'http://localhost:8000/nodes/remove?node_url=http://0.0.0.0:23333' \
   -H 'accept: application/json' \
   -d ''
 ```
@@ -61,7 +61,7 @@ curl -X 'POST' \
 ```python
 # 查询所有节点
 import requests
-url = 'http://localhost:10086/nodes/status'
+url = 'http://localhost:8000/nodes/status'
 headers = {'accept': 'application/json'}
 response = requests.get(url, headers=headers)
 print(response.text)
@@ -70,7 +70,7 @@ print(response.text)
 ```python
 # 添加新节点
 import requests
-url = 'http://localhost:10086/nodes/add'
+url = 'http://localhost:8000/nodes/add'
 headers = {
     'accept': 'application/json',
     'Content-Type': 'application/json'
@@ -83,7 +83,7 @@ print(response.text)
 ```python
 # 删除某个节点
 import requests
-url = 'http://localhost:10086/nodes/remove'
+url = 'http://localhost:8000/nodes/remove'
 headers = {'accept': 'application/json',}
 params = {'node_url': 'http://0.0.0.0:23333',}
 response = requests.post(url, headers=headers, data='', params=params)
diff --git a/lmdeploy/cli/serve.py b/lmdeploy/cli/serve.py
index e579da195..0e259fb9c 100644
--- a/lmdeploy/cli/serve.py
+++ b/lmdeploy/cli/serve.py
@@ -208,6 +208,36 @@ def add_parser_api_client():
                             'api key will be used')
         ArgumentHelper.session_id(parser)
 
+    @staticmethod
+    def add_parser_proxy():
+        """Add parser for proxy server command."""
+        parser = SubCliServe.subparsers.add_parser(
+            'proxy',
+            formatter_class=DefaultsAndTypesHelpFormatter,
+            description=SubCliServe.proxy.__doc__,
+            help=SubCliServe.proxy.__doc__)
+        parser.set_defaults(run=SubCliServe.proxy)
+        parser.add_argument('--server-name',
+                            type=str,
+                            default='0.0.0.0',
+                            help='Host ip for proxy serving')
+        parser.add_argument('--server-port',
+                            type=int,
+                            default=8000,
+                            help='Server port of the proxy')
+        parser.add_argument(
+            '--strategy',
+            type=str,
+            choices=['random', 'min_expected_latency', 'min_observed_latency'],
+            default='min_expected_latency',
+            help='the strategy to dispatch requests to nodes')
+        parser.add_argument('--api-key',
+                            type=str,
+                            default=None,
+                            help='api key. Default to None, which means no '
+                            'api key will be used')
+        ArgumentHelper.ssl(parser)
+
     @staticmethod
     def gradio(args):
         """Serve LLMs with web UI using gradio."""
@@ -325,8 +355,16 @@ def api_client(args):
         kwargs = convert_args(args)
         run_api_client(**kwargs)
 
+    @staticmethod
+    def proxy(args):
+        """Proxy server that manages distributed api_server nodes."""
+        from lmdeploy.serve.proxy.proxy import proxy
+        kwargs = convert_args(args)
+        proxy(**kwargs)
+
     @staticmethod
     def add_parsers():
         SubCliServe.add_parser_gradio()
         SubCliServe.add_parser_api_server()
         SubCliServe.add_parser_api_client()
+        SubCliServe.add_parser_proxy()
diff --git a/lmdeploy/serve/proxy/proxy.py b/lmdeploy/serve/proxy/proxy.py
index fbf3e8471..5f05930bd 100644
--- a/lmdeploy/serve/proxy/proxy.py
+++ b/lmdeploy/serve/proxy/proxy.py
@@ -512,7 +512,7 @@ async def completions_v1(request: CompletionRequest,
 
 
 def proxy(server_name: str = '0.0.0.0',
-          server_port: int = 10086,
+          server_port: int = 8000,
           strategy: Literal['random', 'min_expected_latency',
                             'min_observed_latency'] = 'min_expected_latency',
           api_keys: Optional[Union[List[str], str]] = None,
@@ -522,7 +522,7 @@ def proxy(server_name: str = '0.0.0.0',
 
     Args:
         server_name (str): the server name of the proxy. Default to '0.0.0.0'.
-        server_port (str): the server port. Default to 10086.
+        server_port (str): the server port. Default to 8000.
         strategy ('random' | 'min_expected_latency' | 'min_observed_latency'):
             the strategy to dispatch requests to nodes. Default to
             'min_expected_latency'

From 7902359f8afe3bdd11fa16d87fc4d35012dde41e Mon Sep 17 00:00:00 2001
From: AllentDan <AllentDan@yeah.net>
Date: Mon, 23 Sep 2024 12:06:25 +0800
Subject: [PATCH 4/5] documents

---
 docs/en/llm/proxy_server.md    | 15 ++++++---------
 docs/zh_cn/llm/proxy_server.md | 15 ++++++---------
 2 files changed, 12 insertions(+), 18 deletions(-)

diff --git a/docs/en/llm/proxy_server.md b/docs/en/llm/proxy_server.md
index be7857266..26b24f534 100644
--- a/docs/en/llm/proxy_server.md
+++ b/docs/en/llm/proxy_server.md
@@ -13,8 +13,13 @@ lmdeploy serve proxy --server-name {server_name} --server-port {server_port} --s
 After startup is successful, the URL of the proxy service will also be printed by the script. Access this URL in your browser to open the Swagger UI.
 Subsequently, users can add it directly to the proxy service when starting the `api_server` service by using the `--proxy-url` command. For example:
 `lmdeploy serve api_server InternLM/internlm2-chat-1_8b --proxy-url http://0.0.0.0:8000`。
+In this way, users can access the services of the `api_server` through the proxy node, and the usage of the proxy node is exactly the same as that of the `api_server`, both of which are compatible with the OpenAI format.
 
-## API
+- /v1/models
+- /v1/chat/completions
+- /v1/completions
+
+## Node Management
 
 Through Swagger UI, we can see multiple APIs. Those related to api_server node management include:
 
@@ -24,14 +29,6 @@ Through Swagger UI, we can see multiple APIs. Those related to api_server node m
 
 They respectively represent viewing all api_server service nodes, adding a certain node, and deleting a certain node.
 
-APIs related to usage include:
-
-- /v1/models
-- /v1/chat/completions
-- /v1/completions
-
-The usage of these APIs is the same as that of api_server.
-
 ### Node Management through curl
 
 ```shell
diff --git a/docs/zh_cn/llm/proxy_server.md b/docs/zh_cn/llm/proxy_server.md
index 103867efa..960ab7a74 100644
--- a/docs/zh_cn/llm/proxy_server.md
+++ b/docs/zh_cn/llm/proxy_server.md
@@ -12,8 +12,13 @@ lmdeploy serve proxy --server-name {server_name} --server-port {server_port} --s
 
 启动成功后，代理服务的 URL 也会被脚本打印。浏览器访问这个 URL，可以打开 Swagger UI。
 随后，用户可以在启动 api_server 服务的时候，通过 `--proxy-url` 命令将其直接添加到代理服务中。例如：`lmdeploy serve api_server InternLM/internlm2-chat-1_8b --proxy-url http://0.0.0.0:8000`。
+这样，用户可以通过代理节点访问 api_server 的服务，代理节点的使用方式和 api_server 一模一样，都是兼容 OpenAI 的形式。
 
-## API
+- /v1/models
+- /v1/chat/completions
+- /v1/completions
+
+## 节点管理
 
 通过 Swagger UI，我们可以看到多个 API。其中，和 api_server 节点管理相关的有：
 
@@ -23,14 +28,6 @@ lmdeploy serve proxy --server-name {server_name} --server-port {server_port} --s
 
 他们分别表示，查看所有的 api_server 服务节点，增加某个节点，删除某个节点。他们的使用方式，最直接的可以在浏览器里面直接操作。也可以通过命令行或者 python 操作。
 
-和使用相关的 api 有：
-
-- /v1/models
-- /v1/chat/completions
-- /v1/completions
-
-这些 API 的使用方式和 api_server 一样。
-
 ### 通过 command 增删查
 
 ```shell

From ecb71b643cad9b51ce7e139be3bfc77b735ab9ae Mon Sep 17 00:00:00 2001
From: AllentDan <AllentDan@yeah.net>
Date: Mon, 23 Sep 2024 14:13:41 +0800
Subject: [PATCH 5/5] api keys

---
 lmdeploy/cli/serve.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/lmdeploy/cli/serve.py b/lmdeploy/cli/serve.py
index 0e259fb9c..c82089b4e 100644
--- a/lmdeploy/cli/serve.py
+++ b/lmdeploy/cli/serve.py
@@ -231,11 +231,7 @@ def add_parser_proxy():
             choices=['random', 'min_expected_latency', 'min_observed_latency'],
             default='min_expected_latency',
             help='the strategy to dispatch requests to nodes')
-        parser.add_argument('--api-key',
-                            type=str,
-                            default=None,
-                            help='api key. Default to None, which means no '
-                            'api key will be used')
+        ArgumentHelper.api_keys(parser)
         ArgumentHelper.ssl(parser)
 
     @staticmethod