Skip to content

Commit

Permalink
[MODEL] Add Telechat2 (China Telecom) (#1106)
Browse files Browse the repository at this point in the history
* add support for telechat2

* Update telechat2.py

* Update auto.py

---------

Co-authored-by: xiayongqiang <xiayq1@chinatelecom.cn>
Co-authored-by: LRL-ModelCloud <165116337+LRL-ModelCloud@users.noreply.github.com>
Co-authored-by: Qubitium-ModelCloud <qubitium@modelcloud.ai>
  • Loading branch information
4 people authored Jan 20, 2025
1 parent e0ad9eb commit 23603f6
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 0 deletions.
1 change: 1 addition & 0 deletions gptqmodel/models/_const.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ def get_best_device(backend: BACKEND = BACKEND.AUTO) -> torch.device:
"hymba",
"olmo2",
"ovis",
"telechat",
]

EXLLAMA_DEFAULT_MAX_INPUT_LENGTH = 2048
Expand Down
3 changes: 3 additions & 0 deletions gptqmodel/models/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,9 +84,11 @@
from .definitions.rw import RWGPTQ # noqa: E402
from .definitions.stablelmepoch import StableLMEpochGPTQ # noqa: E402
from .definitions.starcoder2 import Starcoder2GPTQ # noqa: E402
from .definitions.telechat2 import TeleChat2GPTQ
from .definitions.xverse import XverseGPTQ # noqa: E402
from .definitions.yi import YiGPTQ # noqa: E402


logger = setup_logger()

MODEL_MAP = {
Expand Down Expand Up @@ -139,6 +141,7 @@
"hymba": HymbaGPTQ,
"olmo2": Olmo2GPTQ,
"ovis": OvisGPTQ,
"telechat": TeleChat2GPTQ,
}


Expand Down
1 change: 1 addition & 0 deletions gptqmodel/models/definitions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,4 @@
from .starcoder2 import Starcoder2GPTQ
from .xverse import XverseGPTQ
from .yi import YiGPTQ
from .telechat2 import TeleChat2GPTQ
26 changes: 26 additions & 0 deletions gptqmodel/models/definitions/telechat2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from ..base import BaseGPTQModel
import torch


class TeleChat2GPTQ(BaseGPTQModel):
# telechat2 requires custom model code
require_trust_remote_code = True
# telechat2 requires float16
require_dtype = torch.float16

layer_type = "TelechatBlock"
layers_node = "transformer.h"
base_modules = ["transformer.word_embeddings", "transformer.ln_f"]

"""
If other frameworks are used for inference (such as VLLM),
it is best not to quantify QKV due to the organization of
key value weights in the Telechat model
"""
layer_modules = [
["self_attention.dense"],
["mlp.up_proj", "mlp.gate_proj"],
["mlp.down_proj"]
]

__all__ = ["TeleChat2GPTQ"]

0 comments on commit 23603f6

Please sign in to comment.