-
-
Notifications
You must be signed in to change notification settings - Fork 5.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[Model] Added GLM-4 series hf format model support vllm==0.6.4 (#10561)
Signed-off-by: Isotr0py <2037008807@qq.com> Co-authored-by: Isotr0py <2037008807@qq.com> Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
- Loading branch information
1 parent
3ed5e73
commit 5fc5ce0
Showing
5 changed files
with
30 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
"""Inference-only HF format GLM-4 model compatible with THUDM weights.""" | ||
from vllm.config import VllmConfig | ||
from vllm.model_executor.models.llama import LlamaForCausalLM | ||
|
||
from .utils import PPMissingLayer | ||
|
||
|
||
class GlmForCausalLM(LlamaForCausalLM): | ||
|
||
def __init__(self, *, vllm_config: VllmConfig, prefix: str = ""): | ||
super().__init__(vllm_config=vllm_config, prefix=prefix) | ||
# Hack Llama model to fit HF format GLM implementation | ||
# Attention difference between GLM and Llama: | ||
# 1. Half partial rotary_dim and no Neox style. | ||
# 2. There is no bias for o_proj in attention | ||
for layer in self.model.layers: | ||
if not isinstance(layer, PPMissingLayer): | ||
layer.self_attn.rotary_emb.rotary_dim //= 2 | ||
layer.self_attn.rotary_emb.is_neox_style = False | ||
layer.self_attn.o_proj.bias = None | ||
layer.self_attn.o_proj.skip_bias_add = True |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters