[MODEL] Add Telechat2 (China Telecom) (#1106)

* add support for telechat2 * Update telechat2.py * Update auto.py --------- Co-authored-by: xiayongqiang <xiayq1@chinatelecom.cn> Co-authored-by: LRL-ModelCloud <165116337+LRL-ModelCloud@users.noreply.github.com> Co-authored-by: Qubitium-ModelCloud <qubitium@modelcloud.ai>
ModelCloud · Jan 20, 2025 · 23603f6 · 23603f6
1 parent e0ad9eb
commit 23603f6
Show file tree

Hide file tree

Showing 4 changed files with 31 additions and 0 deletions.
diff --git a/gptqmodel/models/_const.py b/gptqmodel/models/_const.py
@@ -167,6 +167,7 @@ def get_best_device(backend: BACKEND = BACKEND.AUTO) -> torch.device:
     "hymba",
     "olmo2",
     "ovis",
+    "telechat",
 ]
 
 EXLLAMA_DEFAULT_MAX_INPUT_LENGTH = 2048

diff --git a/gptqmodel/models/auto.py b/gptqmodel/models/auto.py
@@ -84,9 +84,11 @@
 from .definitions.rw import RWGPTQ  # noqa: E402
 from .definitions.stablelmepoch import StableLMEpochGPTQ  # noqa: E402
 from .definitions.starcoder2 import Starcoder2GPTQ  # noqa: E402
+from .definitions.telechat2 import TeleChat2GPTQ
 from .definitions.xverse import XverseGPTQ  # noqa: E402
 from .definitions.yi import YiGPTQ  # noqa: E402
 
+
 logger = setup_logger()
 
 MODEL_MAP = {
@@ -139,6 +141,7 @@
     "hymba": HymbaGPTQ,
     "olmo2": Olmo2GPTQ,
     "ovis": OvisGPTQ,
+    "telechat": TeleChat2GPTQ,
 }
 
 

diff --git a/gptqmodel/models/definitions/__init__.py b/gptqmodel/models/definitions/__init__.py
@@ -59,3 +59,4 @@
 from .starcoder2 import Starcoder2GPTQ
 from .xverse import XverseGPTQ
 from .yi import YiGPTQ
+from .telechat2 import TeleChat2GPTQ
diff --git a/gptqmodel/models/definitions/telechat2.py b/gptqmodel/models/definitions/telechat2.py
@@ -0,0 +1,26 @@
+from ..base import BaseGPTQModel
+import torch
+
+
+class TeleChat2GPTQ(BaseGPTQModel):
+    # telechat2 requires custom model code
+    require_trust_remote_code = True
+    # telechat2 requires float16
+    require_dtype = torch.float16
+
+    layer_type = "TelechatBlock"
+    layers_node = "transformer.h"
+    base_modules = ["transformer.word_embeddings", "transformer.ln_f"]
+
+    """
+    If other frameworks are used for inference (such as VLLM),
+    it is best not to quantify QKV due to the organization of
+    key value weights in the Telechat model
+    """
+    layer_modules = [
+        ["self_attention.dense"],
+        ["mlp.up_proj", "mlp.gate_proj"],
+        ["mlp.down_proj"]
+    ]
+
+__all__ = ["TeleChat2GPTQ"]