Skip to content

Commit

Permalink
[Model] Added GLM-4 series hf format model support vllm==0.6.4 (#10561)
Browse files Browse the repository at this point in the history
Signed-off-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Isotr0py <2037008807@qq.com>
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
  • Loading branch information
3 people authored Nov 28, 2024
1 parent 3ed5e73 commit 5fc5ce0
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 1 deletion.
5 changes: 5 additions & 0 deletions docs/source/models/supported_models.rst
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,11 @@ Text Generation
- :code:`google/gemma-2-9b`, :code:`google/gemma-2-27b`, etc.
- ✅︎
- ✅︎
* - :code:`GlmForCausalLM`
- GLM-4
- :code:`THUDM/glm-4-9b-chat-hf`, etc.
- ✅︎
- ✅︎
* - :code:`GPT2LMHeadModel`
- GPT-2
- :code:`gpt2`, :code:`gpt2-xl`, etc.
Expand Down
1 change: 1 addition & 0 deletions tests/models/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class _HfExamplesInfo:
"FalconForCausalLM": _HfExamplesInfo("tiiuae/falcon-7b"),
"GemmaForCausalLM": _HfExamplesInfo("google/gemma-2b"),
"Gemma2ForCausalLM": _HfExamplesInfo("google/gemma-2-9b"),
"GlmForCausalLM": _HfExamplesInfo("THUDM/glm-4-9b-chat-hf"),
"GPT2LMHeadModel": _HfExamplesInfo("gpt2"),
"GPTBigCodeForCausalLM": _HfExamplesInfo("bigcode/starcoder"),
"GPTJForCausalLM": _HfExamplesInfo("EleutherAI/gpt-j-6b"),
Expand Down
2 changes: 1 addition & 1 deletion tests/models/test_initialization.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@

@pytest.mark.parametrize("model_arch", HF_EXAMPLE_MODELS.get_supported_archs())
def test_can_initialize(model_arch):
if (model_arch == "Idefics3ForConditionalGeneration"
if (model_arch in {"Idefics3ForConditionalGeneration", "GlmForCausalLM"}
and transformers.__version__ < "4.46.0"):
pytest.skip(reason="Model introduced in HF >= 4.46.0")

Expand Down
21 changes: 21 additions & 0 deletions vllm/model_executor/models/glm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
"""Inference-only HF format GLM-4 model compatible with THUDM weights."""
from vllm.config import VllmConfig
from vllm.model_executor.models.llama import LlamaForCausalLM

from .utils import PPMissingLayer


class GlmForCausalLM(LlamaForCausalLM):

def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""):
super().__init__(vllm_config=vllm_config, prefix=prefix)
# Hack Llama model to fit HF format GLM implementation
# Attention difference between GLM and Llama:
# 1. Half partial rotary_dim and no Neox style.
# 2. There is no bias for o_proj in attention
for layer in self.model.layers:
if not isinstance(layer, PPMissingLayer):
layer.self_attn.rotary_emb.rotary_dim //= 2
layer.self_attn.rotary_emb.is_neox_style = False
layer.self_attn.o_proj.bias = None
layer.self_attn.o_proj.skip_bias_add = True
2 changes: 2 additions & 0 deletions vllm/model_executor/models/registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
"FalconForCausalLM": ("falcon", "FalconForCausalLM"),
"GemmaForCausalLM": ("gemma", "GemmaForCausalLM"),
"Gemma2ForCausalLM": ("gemma2", "Gemma2ForCausalLM"),
"GlmForCausalLM": ("glm", "GlmForCausalLM"),
"GPT2LMHeadModel": ("gpt2", "GPT2LMHeadModel"),
"GPTBigCodeForCausalLM": ("gpt_bigcode", "GPTBigCodeForCausalLM"),
"GPTJForCausalLM": ("gpt_j", "GPTJForCausalLM"),
Expand Down Expand Up @@ -107,6 +108,7 @@
"XLMRobertaModel": ("roberta", "RobertaEmbeddingModel"),
"DeciLMForCausalLM": ("decilm", "DeciLMForCausalLM"),
"Gemma2Model": ("gemma2", "Gemma2EmbeddingModel"),
"GlmForCausalLM": ("glm", "GlmForCausalLM"),
"LlamaModel": ("llama", "LlamaEmbeddingModel"),
**{
# Multiple models share the same architecture, so we include them all
Expand Down

0 comments on commit 5fc5ce0

Please sign in to comment.