Add qwen2.5-coder 0.5B 1.5B 3B 14B 32B

xorbitsai · Nov 12, 2024 · 6057081 · 6057081
1 parent fe94552
commit 6057081
Show file tree

Hide file tree

Showing 2 changed files with 281 additions and 2 deletions.
diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json
@@ -8205,6 +8205,17 @@
     ],
     "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
     "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-0.5B",
+        "model_revision": "master"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": "1_5",
@@ -8214,7 +8225,18 @@
           "none"
         ],
         "model_id": "Qwen/Qwen2.5-Coder-1.5B",
-        "model_revision": "d3586cfe793730945f8e4d7ef31032a3ee50247d"
+        "model_revision": "master"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "3",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-3B",
+        "model_revision": "master"
       },
       {
         "model_format": "pytorch",
@@ -8225,7 +8247,29 @@
           "none"
         ],
         "model_id": "Qwen/Qwen2.5-Coder-7B",
-        "model_revision": "30b6a7e874a78d46b80fa1db3194ea427dd41b08"
+        "model_revision": "master"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-14B",
+        "model_revision": "master"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-32B",
+        "model_revision": "master"
       }
     ]
   },
@@ -8243,6 +8287,16 @@
     ],
     "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
     "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": "1_5",
@@ -8253,6 +8307,16 @@
         ],
         "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct"
       },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "3",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-3B-Instruct"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": 7,
@@ -8263,6 +8327,53 @@
         ],
         "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct"
       },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-14B-Instruct"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+            "Int4",
+            "Int8"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct-GPTQ-{quantization}"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+            "Int4",
+            "Int8"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-GPTQ-{quantization}"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "3",
+        "quantizations": [
+            "Int4",
+            "Int8"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-3B-Instruct-GPTQ-{quantization}"
+      },
       {
         "model_format": "gptq",
         "model_size_in_billions": "7",
@@ -8272,6 +8383,24 @@
         ],
         "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-{quantization}"
       },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "14",
+        "quantizations": [
+            "Int4",
+            "Int8"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-{quantization}"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "32",
+        "quantizations": [
+            "Int4",
+            "Int8"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-{quantization}"
+      },
       {
         "model_format": "ggufv2",
         "model_size_in_billions": "1_5",

diff --git a/xinference/model/llm/llm_family_modelscope.json b/xinference/model/llm/llm_family_modelscope.json
@@ -5907,6 +5907,18 @@
     ],
     "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
     "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-0.5B",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": "1_5",
@@ -5919,6 +5931,18 @@
         "model_revision": "master",
         "model_hub": "modelscope"
       },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "3",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-3B",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": 7,
@@ -5930,6 +5954,30 @@
         "model_id": "qwen/Qwen2.5-Coder-7B",
         "model_revision": "master",
         "model_hub": "modelscope"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-14B",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-32B",
+        "model_revision": "master",
+        "model_hub": "modelscope"
       }
     ]
   },
@@ -5947,6 +5995,18 @@
     ],
     "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
     "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-0.5B-Instruct",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": "1_5",
@@ -5958,6 +6018,17 @@
         "model_id": "qwen/Qwen2.5-Coder-1.5B-Instruct",
         "model_revision": "master",
         "model_hub": "modelscope"
+      },      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "3",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-3B-Instruct",
+        "model_revision": "master",
+        "model_hub": "modelscope"
       },
       {
         "model_format": "pytorch",
@@ -5971,6 +6042,63 @@
         "model_revision": "master",
         "model_hub": "modelscope"
       },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-14B-Instruct",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-32B-Instruct",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+          "Int4",
+          "Int8"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-0.5B-Instruct-GPTQ-{quantization}",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+          "Int4",
+          "Int8"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-1.5B-Instruct-GPTQ-{quantization}",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 3,
+        "quantizations": [
+          "Int4",
+          "Int8"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-3B-Instruct-GPTQ-{quantization}",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
       {
         "model_format": "gptq",
         "model_size_in_billions": 7,
@@ -5982,6 +6110,28 @@
         "model_revision": "master",
         "model_hub": "modelscope"
       },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "Int4",
+          "Int8"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-{quantization}",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "Int4",
+          "Int8"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-{quantization}",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
       {
         "model_format": "ggufv2",
         "model_size_in_billions": "1_5",