ModelCloud · Qubitium · Jan 20, 2025 · Jan 20, 2025 · Jan 20, 2025 · Jan 20, 2025
diff --git a/gptqmodel/models/_const.py b/gptqmodel/models/_const.py
@@ -167,6 +167,7 @@ def get_best_device(backend: BACKEND = BACKEND.AUTO) -> torch.device:
     "hymba",
     "olmo2",
     "ovis",
+    "telechat",
 ]
 
 EXLLAMA_DEFAULT_MAX_INPUT_LENGTH = 2048

diff --git a/gptqmodel/models/auto.py b/gptqmodel/models/auto.py
@@ -86,6 +86,7 @@
 from .definitions.starcoder2 import Starcoder2GPTQ  # noqa: E402
 from .definitions.xverse import XverseGPTQ  # noqa: E402
 from .definitions.yi import YiGPTQ  # noqa: E402
+from .definitions.telechat2 import TeleChat2GPTQ
 
 logger = setup_logger()
 
@@ -139,6 +140,7 @@
     "hymba": HymbaGPTQ,
     "olmo2": Olmo2GPTQ,
     "ovis": OvisGPTQ,
+    "telechat":TeleChat2GPTQ,
 }
 
 

diff --git a/gptqmodel/models/definitions/__init__.py b/gptqmodel/models/definitions/__init__.py
@@ -59,3 +59,4 @@
 from .starcoder2 import Starcoder2GPTQ
 from .xverse import XverseGPTQ
 from .yi import YiGPTQ
+from .telechat2 import TeleChat2GPTQ
diff --git a/gptqmodel/models/definitions/telechat2.py b/gptqmodel/models/definitions/telechat2.py
@@ -0,0 +1,26 @@
+from ..base import BaseGPTQModel
+import torch
+
+
+class TeleChat2GPTQ(BaseGPTQModel):
+    # telechat2 requires custom model code
+    require_trust_remote_code = True
+    # telechat2 requires float16
+    require_dtype = torch.float16
+
+    layer_type = "TelechatBlock"
+    layers_node = "transformer.h"
+    base_modules = ["transformer.word_embeddings", "transformer.ln_f"]
+
+    """
+    If other frameworks are used for inference (such as VLLM),
+    it is best not to quantify QKV due to the organization of
+    key value weights in the Telechat model
+    """
+    layer_modules = [
+        ["self_attention.dense"],
+        ["mlp.up_proj", "mlp.gate_proj"],
+        ["mlp.down_proj"]
+    ]
+
+__all__ = ["TeleChat2GPTQ"]