From 3c36559bdeb5f7305f21b9c3b23501b08bda2ffa Mon Sep 17 00:00:00 2001 From: Blueyo0 <30562758+blueyo0@users.noreply.github.com> Date: Thu, 12 Sep 2024 12:53:12 +0800 Subject: [PATCH] [Gemma2] add bitsandbytes support for Gemma2 (#8338) Signed-off-by: Alvant --- vllm/model_executor/models/gemma2.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/vllm/model_executor/models/gemma2.py b/vllm/model_executor/models/gemma2.py index 90449ec51ef0b..f9d9f9e7567c8 100644 --- a/vllm/model_executor/models/gemma2.py +++ b/vllm/model_executor/models/gemma2.py @@ -312,6 +312,14 @@ class Gemma2ForCausalLM(nn.Module, SupportsLoRA): # Gemma does not apply LoRA to the embedding layer. embedding_modules = {} embedding_padding_modules = [] + bitsandbytes_stacked_params_mapping = { + # shard_name, weight_name, index + "q_proj": ("qkv_proj", 0), + "k_proj": ("qkv_proj", 1), + "v_proj": ("qkv_proj", 2), + "gate_proj": ("gate_up_proj", 0), + "up_proj": ("gate_up_proj", 1), + } def __init__( self,