Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[MODEL] Add Telechat2 (China Telecom) #1106

Merged
merged 3 commits into from
Jan 20, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions gptqmodel/models/_const.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ def get_best_device(backend: BACKEND = BACKEND.AUTO) -> torch.device:
"hymba",
"olmo2",
"ovis",
"telechat",
]

EXLLAMA_DEFAULT_MAX_INPUT_LENGTH = 2048
Expand Down
2 changes: 2 additions & 0 deletions gptqmodel/models/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@
from .definitions.starcoder2 import Starcoder2GPTQ # noqa: E402
from .definitions.xverse import XverseGPTQ # noqa: E402
from .definitions.yi import YiGPTQ # noqa: E402
from .definitions.telechat2 import TeleChat2GPTQ

logger = setup_logger()

Expand Down Expand Up @@ -139,6 +140,7 @@
"hymba": HymbaGPTQ,
"olmo2": Olmo2GPTQ,
"ovis": OvisGPTQ,
"telechat":TeleChat2GPTQ,
}


Expand Down
1 change: 1 addition & 0 deletions gptqmodel/models/definitions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,4 @@
from .starcoder2 import Starcoder2GPTQ
from .xverse import XverseGPTQ
from .yi import YiGPTQ
from .telechat2 import TeleChat2GPTQ
26 changes: 26 additions & 0 deletions gptqmodel/models/definitions/telechat2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
from ..base import BaseGPTQModel
import torch


class TeleChat2GPTQ(BaseGPTQModel):
# telechat2 requires custom model code
require_trust_remote_code = True
# telechat2 requires float16
require_dtype = torch.float16

layer_type = "TelechatBlock"
layers_node = "transformer.h"
base_modules = ["transformer.word_embeddings", "transformer.ln_f"]

"""
If other frameworks are used for inference (such as VLLM),
1096125073 marked this conversation as resolved.
Show resolved Hide resolved
it is best not to quantify QKV due to the organization of
key value weights in the Telechat model
"""
layer_modules = [
["self_attention.dense"],
["mlp.up_proj", "mlp.gate_proj"],
["mlp.down_proj"]
]

__all__ = ["TeleChat2GPTQ"]