diff --git a/xinference/model/llm/llm_family.json b/xinference/model/llm/llm_family.json
index d8c2f4aa29..472f35b6eb 100644
--- a/xinference/model/llm/llm_family.json
+++ b/xinference/model/llm/llm_family.json
@@ -8205,6 +8205,16 @@
     ],
     "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
     "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-0.5B"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": "1_5",
@@ -8213,8 +8223,17 @@
           "8-bit",
           "none"
         ],
-        "model_id": "Qwen/Qwen2.5-Coder-1.5B",
-        "model_revision": "d3586cfe793730945f8e4d7ef31032a3ee50247d"
+        "model_id": "Qwen/Qwen2.5-Coder-1.5B"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "3",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-3B"
       },
       {
         "model_format": "pytorch",
@@ -8224,8 +8243,27 @@
           "8-bit",
           "none"
         ],
-        "model_id": "Qwen/Qwen2.5-Coder-7B",
-        "model_revision": "30b6a7e874a78d46b80fa1db3194ea427dd41b08"
+        "model_id": "Qwen/Qwen2.5-Coder-7B"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-14B"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-32B"
       }
     ]
   },
@@ -8243,6 +8281,16 @@
     ],
     "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
     "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": "1_5",
@@ -8253,6 +8301,16 @@
         ],
         "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct"
       },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "3",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-3B-Instruct"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": 7,
@@ -8263,6 +8321,53 @@
         ],
         "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct"
       },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-14B-Instruct"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+            "Int4",
+            "Int8"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct-GPTQ-{quantization}"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+            "Int4",
+            "Int8"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-GPTQ-{quantization}"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "3",
+        "quantizations": [
+            "Int4",
+            "Int8"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-3B-Instruct-GPTQ-{quantization}"
+      },
       {
         "model_format": "gptq",
         "model_size_in_billions": "7",
@@ -8272,6 +8377,73 @@
         ],
         "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-GPTQ-{quantization}"
       },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "14",
+        "quantizations": [
+            "Int4",
+            "Int8"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-{quantization}"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "32",
+        "quantizations": [
+            "Int4",
+            "Int8"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-{quantization}"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+            "Int4"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-0.5B-Instruct-AWQ"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+            "Int4"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-1.5B-Instruct-AWQ"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "3",
+        "quantizations": [
+            "Int4"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-3B-Instruct-AWQ"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "7",
+        "quantizations": [
+            "Int4"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-7B-Instruct-AWQ"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "14",
+        "quantizations": [
+            "Int4"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-14B-Instruct-AWQ"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "32",
+        "quantizations": [
+            "Int4"
+        ],
+        "model_id": "Qwen/Qwen2.5-Coder-32B-Instruct-AWQ"
+      },
+
       {
         "model_format": "ggufv2",
         "model_size_in_billions": "1_5",
diff --git a/xinference/model/llm/llm_family_modelscope.json b/xinference/model/llm/llm_family_modelscope.json
index bfd3d09a4a..f8598d3602 100644
--- a/xinference/model/llm/llm_family_modelscope.json
+++ b/xinference/model/llm/llm_family_modelscope.json
@@ -5907,6 +5907,18 @@
     ],
     "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
     "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-0.5B",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": "1_5",
@@ -5919,6 +5931,18 @@
         "model_revision": "master",
         "model_hub": "modelscope"
       },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "3",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-3B",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": 7,
@@ -5930,6 +5954,30 @@
         "model_id": "qwen/Qwen2.5-Coder-7B",
         "model_revision": "master",
         "model_hub": "modelscope"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-14B",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-32B",
+        "model_revision": "master",
+        "model_hub": "modelscope"
       }
     ]
   },
@@ -5947,6 +5995,18 @@
     ],
     "model_description": "Qwen2.5-Coder is the latest series of Code-Specific Qwen large language models (formerly known as CodeQwen).",
     "model_specs": [
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-0.5B-Instruct",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
       {
         "model_format": "pytorch",
         "model_size_in_billions": "1_5",
@@ -5958,6 +6018,17 @@
         "model_id": "qwen/Qwen2.5-Coder-1.5B-Instruct",
         "model_revision": "master",
         "model_hub": "modelscope"
+      },      {
+        "model_format": "pytorch",
+        "model_size_in_billions": "3",
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-3B-Instruct",
+        "model_revision": "master",
+        "model_hub": "modelscope"
       },
       {
         "model_format": "pytorch",
@@ -5971,6 +6042,63 @@
         "model_revision": "master",
         "model_hub": "modelscope"
       },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-14B-Instruct",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "pytorch",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "4-bit",
+          "8-bit",
+          "none"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-32B-Instruct",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+          "Int4",
+          "Int8"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-0.5B-Instruct-GPTQ-{quantization}",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+          "Int4",
+          "Int8"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-1.5B-Instruct-GPTQ-{quantization}",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 3,
+        "quantizations": [
+          "Int4",
+          "Int8"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-3B-Instruct-GPTQ-{quantization}",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
       {
         "model_format": "gptq",
         "model_size_in_billions": 7,
@@ -5982,6 +6110,89 @@
         "model_revision": "master",
         "model_hub": "modelscope"
       },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "Int4",
+          "Int8"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-14B-Instruct-GPTQ-{quantization}",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "gptq",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "Int4",
+          "Int8"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-32B-Instruct-GPTQ-{quantization}",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "0_5",
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-0.5B-Instruct-AWQ",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": "1_5",
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-1.5B-Instruct-AWQ",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 3,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-3B-Instruct-AWQ",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 7,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-7B-Instruct-AWQ",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 14,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-14B-Instruct-AWQ",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
+      {
+        "model_format": "awq",
+        "model_size_in_billions": 32,
+        "quantizations": [
+          "Int4"
+        ],
+        "model_id": "qwen/Qwen2.5-Coder-32B-Instruct-AWQ",
+        "model_revision": "master",
+        "model_hub": "modelscope"
+      },
+
       {
         "model_format": "ggufv2",
         "model_size_in_billions": "1_5",