Skip to content

Commit

Permalink
Add support for FSDP+QLoRA and DeepSpeed ZeRO3+QLoRA (#1416)
Browse files Browse the repository at this point in the history
* don't do mp casting

* don't use `prepare_for_kbit` when using fsdp+qlora or dsz3+qlora

* changes to enable fsdp+qlora and dsz3+qlora

* revert

* Update sft_trainer.py

* quality

* fix deprecation using changes from PR #1415

* fixes

* quality

* Update trl/trainer/sft_trainer.py

Co-authored-by: Younes Belkada <49240599+younesbelkada@users.noreply.github.com>

* quality

* relaunch tests

---------

Co-authored-by: Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
  • Loading branch information
pacman100 and younesbelkada committed Mar 13, 2024
1 parent 486e7a4 commit 58c0888
Showing 1 changed file with 18 additions and 2 deletions.
20 changes: 18 additions & 2 deletions trl/trainer/sft_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -186,7 +186,18 @@ def __init__(
inspect.signature(prepare_model_for_kbit_training).parameters
)
gradient_checkpointing_kwargs = getattr(args, "gradient_checkpointing_kwargs", None) or {}
if getattr(model, "is_loaded_in_8bit", False) or getattr(model, "is_loaded_in_4bit", False):
is_sharded_qlora = False
# Below is to support QLoRA + FSDP / DS-Zero3 - one should never call
# peft_module_casting_to_bf16 or prepare_model_for_kbit_training when doing
# QLoRA + FSDP / DS-Zero3
if getattr(model, "is_loaded_in_4bit", False):
for _, param in model.named_parameters():
if param.__class__.__name__ == "Params4bit":
is_sharded_qlora = param.data.device.type == "cpu"
break
if getattr(model, "is_loaded_in_8bit", False) or (
getattr(model, "is_loaded_in_4bit", False) and not is_sharded_qlora
):
prepare_model_kwargs = {
"use_gradient_checkpointing": getattr(args, "gradient_checkpointing", False)
}
Expand All @@ -213,7 +224,12 @@ def make_inputs_require_grad(module, input, output):
model.get_input_embeddings().register_forward_hook(make_inputs_require_grad)

model = get_peft_model(model, peft_config)
if args is not None and args.bf16 and getattr(model, "is_loaded_in_4bit", False):
if (
args is not None
and args.bf16
and getattr(model, "is_loaded_in_4bit", False)
and not is_sharded_qlora
):
peft_module_casting_to_bf16(model)

if tokenizer is None:
Expand Down

0 comments on commit 58c0888

Please sign in to comment.