From 12ae107986b2bbf853ffca0f45a2edf85a7f4d35 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dr=2E=20Artificial=E6=9B=BE=E5=B0=8F=E5=81=A5?= <875100501@qq.com> Date: Thu, 25 Jul 2024 16:15:02 +0800 Subject: [PATCH 1/4] Update README.md Qwen not QWen --- README.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README.md b/README.md index 5fd06c985..44543c413 100644 --- a/README.md +++ b/README.md @@ -115,10 +115,10 @@ For detailed inference benchmarks in more devices and more settings, please refe
  • InternLM (7B - 20B)
  • InternLM2 (7B - 20B)
  • InternLM2.5 (7B)
  • -
  • QWen (1.8B - 72B)
  • -
  • QWen1.5 (0.5B - 110B)
  • -
  • QWen1.5 - MoE (0.5B - 72B)
  • -
  • QWen2 (0.5B - 72B)
  • +
  • Qwen (1.8B - 72B)
  • +
  • Qwen1.5 (0.5B - 110B)
  • +
  • Qwen1.5 - MoE (0.5B - 72B)
  • +
  • Qwen2 (0.5B - 72B)
  • Baichuan (7B)
  • Baichuan2 (7B-13B)
  • Code Llama (7B - 34B)
  • @@ -142,7 +142,7 @@ For detailed inference benchmarks in more devices and more settings, please refe
  • LLaVA(1.5,1.6) (7B-34B)
  • InternLM-XComposer2 (7B, 4khd-7B)
  • InternLM-XComposer2.5 (7B)
  • -
  • QWen-VL (7B)
  • +
  • Qwen-VL (7B)
  • DeepSeek-VL (7B)
  • InternVL-Chat (v1.1-v1.5)
  • InternVL2 (1B-40B)
  • From 3ba4f8c7297b17e5100436413c68933a934f874b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dr=2E=20Artificial=E6=9B=BE=E5=B0=8F=E5=81=A5?= <875100501@qq.com> Date: Sat, 27 Jul 2024 11:45:43 +0800 Subject: [PATCH 2/4] Update README_zh-CN.md --- README_zh-CN.md | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/README_zh-CN.md b/README_zh-CN.md index e6b02d70f..144b2c970 100644 --- a/README_zh-CN.md +++ b/README_zh-CN.md @@ -116,10 +116,10 @@ LMDeploy TurboMind 引擎拥有卓越的推理能力,在各种规模的模型
  • InternLM (7B - 20B)
  • InternLM2 (7B - 20B)
  • InternLM2.5 (7B)
  • -
  • QWen (1.8B - 72B)
  • -
  • QWen1.5 (0.5B - 110B)
  • -
  • QWen1.5 - MoE (0.5B - 72B)
  • -
  • QWen2 (0.5B - 72B)
  • +
  • Qwen (1.8B - 72B)
  • +
  • Qwen1.5 (0.5B - 110B)
  • +
  • Qwen1.5 - MoE (0.5B - 72B)
  • +
  • Qwen2 (0.5B - 72B)
  • Baichuan (7B)
  • Baichuan2 (7B-13B)
  • Code Llama (7B - 34B)
  • @@ -143,7 +143,7 @@ LMDeploy TurboMind 引擎拥有卓越的推理能力,在各种规模的模型
  • LLaVA(1.5,1.6) (7B-34B)
  • InternLM-XComposer2 (7B, 4khd-7B)
  • InternLM-XComposer2.5 (7B)
  • -
  • QWen-VL (7B)
  • +
  • Qwen-VL (7B)
  • DeepSeek-VL (7B)
  • InternVL-Chat (v1.1-v1.5)
  • InternVL2 (1B-40B)
  • From 0585c00d1a6cab25c020bbd3fd423979d0976992 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dr=2E=20Artificial=E6=9B=BE=E5=B0=8F=E5=81=A5?= <875100501@qq.com> Date: Sat, 27 Jul 2024 12:07:21 +0800 Subject: [PATCH 3/4] Update supported_models.md --- .../zh_cn/supported_models/supported_models.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/zh_cn/supported_models/supported_models.md b/docs/zh_cn/supported_models/supported_models.md index ab4692fe1..a42a8ddfb 100644 --- a/docs/zh_cn/supported_models/supported_models.md +++ b/docs/zh_cn/supported_models/supported_models.md @@ -14,11 +14,11 @@ | InternLM-XComposer | 7B | Yes | Yes | Yes | Yes | | InternLM-XComposer2 | 7B, 4khd-7B | Yes | Yes | Yes | Yes | | InternLM-XComposer2.5 | 7B | Yes | Yes | Yes | Yes | -| QWen | 1.8B - 72B | Yes | Yes | Yes | Yes | -| QWen1.5 | 1.8B - 110B | Yes | Yes | Yes | Yes | -| QWen2 | 1.5B - 72B | Yes | Yes | Yes | Yes | +| Qwen | 1.8B - 72B | Yes | Yes | Yes | Yes | +| Qwen1.5 | 1.8B - 110B | Yes | Yes | Yes | Yes | +| Qwen2 | 1.5B - 72B | Yes | Yes | Yes | Yes | | Mistral | 7B | Yes | Yes | Yes | No | -| QWen-VL | 7B | Yes | Yes | Yes | Yes | +| Qwen-VL | 7B | Yes | Yes | Yes | Yes | | DeepSeek-VL | 7B | Yes | Yes | Yes | Yes | | Baichuan | 7B | Yes | Yes | Yes | Yes | | Baichuan2 | 7B | Yes | Yes | Yes | Yes | @@ -35,7 +35,7 @@ “-” 表示还没有验证。 ```{note} -turbomind 引擎不支持 window attention。所以,对于应用了 window attention,并开启了对应的开关"use_sliding_window"的模型,比如 Mistral、QWen1.5 等,在推理时,请选择 pytorch engine +turbomind 引擎不支持 window attention。所以,对于应用了 window attention,并开启了对应的开关"use_sliding_window"的模型,比如 Mistral、Qwen1.5 等,在推理时,请选择 pytorch engine ``` ### PyTorch 支持的模型 @@ -55,10 +55,10 @@ turbomind 引擎不支持 window attention。所以,对于应用了 window att | YI | 6B - 34B | Yes | No | No | | Mistral | 7B | Yes | No | No | | Mixtral | 8x7B | Yes | No | No | -| QWen | 1.8B - 72B | Yes | No | No | -| QWen1.5 | 0.5B - 110B | Yes | No | No | -| QWen2 | 0.5B - 72B | Yes | No | No | -| QWen1.5-MoE | A2.7B | Yes | No | No | +| Qwen | 1.8B - 72B | Yes | No | No | +| Qwen1.5 | 0.5B - 110B | Yes | No | No | +| Qwen2 | 0.5B - 72B | Yes | No | No | +| Qwen1.5-MoE | A2.7B | Yes | No | No | | DeepSeek-MoE | 16B | Yes | No | No | | DeepSeek-V2 | 16B, 236B | Yes | No | No | | Gemma | 2B-7B | Yes | No | No | From d2633cdda6f140d05b35e0fa8115c91c5fa27a93 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Dr=2E=20Artificial=E6=9B=BE=E5=B0=8F=E5=81=A5?= <875100501@qq.com> Date: Sat, 27 Jul 2024 12:10:02 +0800 Subject: [PATCH 4/4] Update supported_models.md --- docs/en/supported_models/supported_models.md | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/en/supported_models/supported_models.md b/docs/en/supported_models/supported_models.md index c3ff20625..a08a1aa00 100644 --- a/docs/en/supported_models/supported_models.md +++ b/docs/en/supported_models/supported_models.md @@ -14,11 +14,11 @@ | InternLM-XComposer | 7B | Yes | Yes | Yes | Yes | | InternLM-XComposer2 | 7B, 4khd-7B | Yes | Yes | Yes | Yes | | InternLM-XComposer2.5 | 7B | Yes | Yes | Yes | Yes | -| QWen | 1.8B - 72B | Yes | Yes | Yes | Yes | -| QWen1.5 | 1.8B - 110B | Yes | Yes | Yes | Yes | -| QWen2 | 1.5B - 72B | Yes | Yes | Yes | Yes | +| Qwen | 1.8B - 72B | Yes | Yes | Yes | Yes | +| Qwen1.5 | 1.8B - 110B | Yes | Yes | Yes | Yes | +| Qwen2 | 1.5B - 72B | Yes | Yes | Yes | Yes | | Mistral | 7B | Yes | Yes | Yes | No | -| QWen-VL | 7B | Yes | Yes | Yes | Yes | +| Qwen-VL | 7B | Yes | Yes | Yes | Yes | | DeepSeek-VL | 7B | Yes | Yes | Yes | Yes | | Baichuan | 7B | Yes | Yes | Yes | Yes | | Baichuan2 | 7B | Yes | Yes | Yes | Yes | @@ -35,7 +35,7 @@ "-" means not verified yet. ```{note} -The TurboMind engine doesn't support window attention. Therefore, for models that have applied window attention and have the corresponding switch "use_sliding_window" enabled, such as Mistral, QWen1.5 and etc., please choose the PyTorch engine for inference. +The TurboMind engine doesn't support window attention. Therefore, for models that have applied window attention and have the corresponding switch "use_sliding_window" enabled, such as Mistral, Qwen1.5 and etc., please choose the PyTorch engine for inference. ``` ## Models supported by PyTorch @@ -55,10 +55,10 @@ The TurboMind engine doesn't support window attention. Therefore, for models tha | YI | 6B - 34B | Yes | No | No | | Mistral | 7B | Yes | No | No | | Mixtral | 8x7B | Yes | No | No | -| QWen | 1.8B - 72B | Yes | No | No | -| QWen1.5 | 0.5B - 110B | Yes | No | No | -| QWen1.5-MoE | A2.7B | Yes | No | No | -| QWen2 | 0.5B - 72B | Yes | No | No | +| Qwen | 1.8B - 72B | Yes | No | No | +| Qwen1.5 | 0.5B - 110B | Yes | No | No | +| Qwen1.5-MoE | A2.7B | Yes | No | No | +| Qwen2 | 0.5B - 72B | Yes | No | No | | DeepSeek-MoE | 16B | Yes | No | No | | DeepSeek-V2 | 16B, 236B | Yes | No | No | | Gemma | 2B-7B | Yes | No | No |