Skip to content

Commit

Permalink
add support for telechat2
Browse files Browse the repository at this point in the history
  • Loading branch information
xiayongqiang committed Jan 20, 2025
1 parent 2846042 commit fa032dd
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 0 deletions.
1 change: 1 addition & 0 deletions gptqmodel/models/_const.py
Original file line number Diff line number Diff line change
Expand Up @@ -167,6 +167,7 @@ def get_best_device(backend: BACKEND = BACKEND.AUTO) -> torch.device:
"hymba",
"olmo2",
"ovis",
"telechat",
]

EXLLAMA_DEFAULT_MAX_INPUT_LENGTH = 2048
Expand Down
2 changes: 2 additions & 0 deletions gptqmodel/models/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@
from .definitions.starcoder2 import Starcoder2GPTQ # noqa: E402
from .definitions.xverse import XverseGPTQ # noqa: E402
from .definitions.yi import YiGPTQ # noqa: E402
from .definitions.telechat2 import TeleChat2GPTQ

logger = setup_logger()

Expand Down Expand Up @@ -139,6 +140,7 @@
"hymba": HymbaGPTQ,
"olmo2": Olmo2GPTQ,
"ovis": OvisGPTQ,
"telechat":TeleChat2GPTQ,
}


Expand Down
1 change: 1 addition & 0 deletions gptqmodel/models/definitions/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,3 +59,4 @@
from .starcoder2 import Starcoder2GPTQ
from .xverse import XverseGPTQ
from .yi import YiGPTQ
from .telechat2 import TeleChat2GPTQ
23 changes: 23 additions & 0 deletions gptqmodel/models/definitions/telechat2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from ..base import BaseGPTQModel


class TeleChat2GPTQ(BaseGPTQModel):
# telechat2 requires custom model code
require_trust_remote_code = True

layer_type = "TelechatBlock"
layers_node = "transformer.h"
base_modules = ["transformer.word_embeddings", "transformer.ln_f"]

"""
If other frameworks are used for inference (such as VLLM),
it is best not to quantify QKV due to the organization of
key value weights in the Telechat model
"""
layer_modules = [
["self_attention.dense"],
["mlp.up_proj", "mlp.gate_proj"],
["mlp.down_proj"]
]

__all__ = ["TeleChat2GPTQ"]

0 comments on commit fa032dd

Please sign in to comment.