huggingface · qgallouedec · Aug 6, 2024 · Jul 25, 2024 · Jul 30, 2024 · Jul 30, 2024
diff --git a/trl/trainer/model_config.py b/trl/trainer/model_config.py
@@ -64,6 +64,17 @@ class ModelConfig:
     lora_task_type: str = field(
         default="CAUSAL_LM", metadata={"help": "The task_type to pass for LoRA (use SEQ_CLS for reward modeling)"}
     )
+    use_rslora: bool = field(
+        default=False,
+        metadata={
+            "help": (
+                "When set to True, uses <a href='https://doi.org/10.48550/arXiv.2312.03732'>Rank-Stabilized LoRA</a>"
+                " which sets the adapter scaling factor to `lora_alpha/math.sqrt(r)`, since it"
+                " was proven to work better. Otherwise, it will use the original default"
+                " value of `lora_alpha/r`."
+            )
+        },
+    )
     load_in_8bit: bool = field(
         default=False, metadata={"help": "use 8 bit precision for the base model - works only with LoRA"}
     )

diff --git a/trl/trainer/utils.py b/trl/trainer/utils.py
@@ -791,6 +791,7 @@ def get_peft_config(model_config: ModelConfig) -> "Optional[PeftConfig]":
         lora_dropout=model_config.lora_dropout,
         bias="none",
         task_type=model_config.lora_task_type,
+        use_rslora=model_config.use_rslora,
         target_modules=model_config.lora_target_modules,
         modules_to_save=model_config.lora_modules_to_save,
     )