FEAT: Add qwen2.5-coder 0.5B 1.5B 3B 14B 32B (#2543)

xorbitsai · Nov 13, 2024 · 38728b6 · 38728b6
1 parent fe94552
commit 38728b6
Show file tree

Hide file tree

Showing 2 changed files with 387 additions and 4 deletions.
diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json
@@ -8205,6 +8205,16 @@
     ],
     "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
     "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-0.5B"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": "1_5",
@@ -8213,8 +8223,17 @@
           "8-bit",
           "none"
         ],
-        "model_id": "Qwen/Qwen2.5-Coder-1.5B",
-        "model_revision": "d3586cfe793730945f8e4d7ef31032a3ee50247d"
+        "model_id": "Qwen/Qwen2.5-Coder-1.5B"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "3",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-3B"
       },
       {
         "model_format": "pytorch",
@@ -8224,8 +8243,27 @@
           "8-bit",
           "none"
         ],
-        "model_id": "Qwen/Qwen2.5-Coder-7B",
-        "model_revision": "30b6a7e874a78d46b80fa1db3194ea427dd41b08"
+        "model_id": "Qwen/Qwen2.5-Coder-7B"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-14B"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-32B"
       }
     ]
   },
@@ -8243,6 +8281,16 @@
     ],
     "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
     "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": "1_5",
@@ -8253,6 +8301,16 @@
         ],
         "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct"
       },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "3",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-3B-Instruct"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": 7,
@@ -8263,6 +8321,53 @@
         ],
         "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct"
       },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-14B-Instruct"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+            "Int4",
+            "Int8"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct-GPTQ-{quantization}"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+            "Int4",
+            "Int8"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-GPTQ-{quantization}"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "3",
+        "quantizations": [
+            "Int4",
+            "Int8"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-3B-Instruct-GPTQ-{quantization}"
+      },
       {
         "model_format": "gptq",
         "model_size_in_billions": "7",
@@ -8272,6 +8377,73 @@
         ],
         "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-{quantization}"
       },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "14",
+        "quantizations": [
+            "Int4",
+            "Int8"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-{quantization}"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "32",
+        "quantizations": [
+            "Int4",
+            "Int8"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-{quantization}"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+            "Int4"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct-AWQ"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+            "Int4"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-AWQ"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "3",
+        "quantizations": [
+            "Int4"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-3B-Instruct-AWQ"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "7",
+        "quantizations": [
+            "Int4"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-AWQ"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "14",
+        "quantizations": [
+            "Int4"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-14B-Instruct-AWQ"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "32",
+        "quantizations": [
+            "Int4"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct-AWQ"
+      },
+
       {
         "model_format": "ggufv2",
         "model_size_in_billions": "1_5",