From 6272452b8c259f1444dda7d0fe68b08dbcb22d83 Mon Sep 17 00:00:00 2001 From: lvhan028 Date: Fri, 15 Nov 2024 18:09:55 +0800 Subject: [PATCH] set wrong head_dim for mistral-nemo --- lmdeploy/turbomind/deploy/source_model/llama.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lmdeploy/turbomind/deploy/source_model/llama.py b/lmdeploy/turbomind/deploy/source_model/llama.py index a8aa51b14..0c702d658 100644 --- a/lmdeploy/turbomind/deploy/source_model/llama.py +++ b/lmdeploy/turbomind/deploy/source_model/llama.py @@ -153,6 +153,7 @@ def model_info(self): max_position_embeddings = int( model_arg.get('max_position_embeddings', 0)) rope_scaling = model_arg.get('rope_scaling', None) + head_dim = model_arg.get('head_dim', hidden_units // attn_head_num) scaling_factor = 0.0 use_dynamic_ntk = 0 scaling_type = '' @@ -189,7 +190,7 @@ def model_info(self): beta_slow = rope_scaling.get('beta_slow', 1.0) return dict( - size_per_head=hidden_units // attn_head_num, + size_per_head=head_dim, rotary_embedding=hidden_units // attn_head_num, num_layer=num_layer, norm_eps=norm_eps,