[FP8 KV Cache, Mixtral] Avoid KeyError at loading pre-quantized FP8 m… (

#1835)
sgl-project · Oct 29, 2024 · 54dd3ea · 54dd3ea
1 parent d04899d
commit 54dd3ea
Showing 1 changed file with 3 additions and 0 deletions.
diff --git a/python/sglang/srt/models/mixtral.py b/python/sglang/srt/models/mixtral.py
@@ -369,6 +369,9 @@ def load_weights(self, weights: Iterable[Tuple[str, torch.Tensor]]):
                     # Skip loading extra bias for GPTQ models.
                     if name.endswith(".bias") and name not in params_dict:
                         continue
+                    # Skip loading kv_scale from ckpts towards new design.
+                    if name.endswith(".kv_scale") and name not in params_dict:
+                        continue
                     if name is None:
                         continue