From 2cbc52a980a29eb2509d16d0a7fadbc1fd88777c Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Fri, 13 Oct 2023 16:01:03 +0000 Subject: [PATCH 01/25] adapt example scripts to use PEFT --- .../text_to_image/train_text_to_image_lora.py | 70 ++++++++----------- 1 file changed, 30 insertions(+), 40 deletions(-) diff --git a/examples/text_to_image/train_text_to_image_lora.py b/examples/text_to_image/train_text_to_image_lora.py index eac0f18f49f4..e1e9798afb69 100644 --- a/examples/text_to_image/train_text_to_image_lora.py +++ b/examples/text_to_image/train_text_to_image_lora.py @@ -34,14 +34,14 @@ from datasets import load_dataset from huggingface_hub import create_repo, upload_folder from packaging import version +from peft import LoraConfig +from peft.utils import get_peft_model_state_dict from torchvision import transforms from tqdm.auto import tqdm from transformers import CLIPTextModel, CLIPTokenizer import diffusers -from diffusers import AutoencoderKL, DDPMScheduler, DiffusionPipeline, UNet2DConditionModel -from diffusers.loaders import AttnProcsLayers -from diffusers.models.attention_processor import LoRAAttnProcessor +from diffusers import AutoencoderKL, DDPMScheduler, DiffusionPipeline, StableDiffusionPipeline, UNet2DConditionModel from diffusers.optimization import get_scheduler from diffusers.training_utils import compute_snr from diffusers.utils import check_min_version, is_wandb_available @@ -439,44 +439,19 @@ def main(): elif accelerator.mixed_precision == "bf16": weight_dtype = torch.bfloat16 + for param in unet.parameters(): + param.requires_grad_(False) + + unet_lora_config = LoraConfig( + r=args.rank, target_modules=["conv1", "conv2", "conv_shortcut", "proj_in", "proj_out"] + ) + # Move unet, vae and text_encoder to device and cast to weight_dtype unet.to(accelerator.device, dtype=weight_dtype) vae.to(accelerator.device, dtype=weight_dtype) text_encoder.to(accelerator.device, dtype=weight_dtype) - # now we will add new LoRA weights to the attention layers - # It's important to realize here how many attention weights will be added and of which sizes - # The sizes of the attention layers consist only of two different variables: - # 1) - the "hidden_size", which is increased according to `unet.config.block_out_channels`. - # 2) - the "cross attention size", which is set to `unet.config.cross_attention_dim`. - - # Let's first see how many attention processors we will have to set. - # For Stable Diffusion, it should be equal to: - # - down blocks (2x attention layers) * (2x transformer layers) * (3x down blocks) = 12 - # - mid blocks (2x attention layers) * (1x transformer layers) * (1x mid blocks) = 2 - # - up blocks (2x attention layers) * (3x transformer layers) * (3x down blocks) = 18 - # => 32 layers - - # Set correct lora layers - lora_attn_procs = {} - for name in unet.attn_processors.keys(): - cross_attention_dim = None if name.endswith("attn1.processor") else unet.config.cross_attention_dim - if name.startswith("mid_block"): - hidden_size = unet.config.block_out_channels[-1] - elif name.startswith("up_blocks"): - block_id = int(name[len("up_blocks.")]) - hidden_size = list(reversed(unet.config.block_out_channels))[block_id] - elif name.startswith("down_blocks"): - block_id = int(name[len("down_blocks.")]) - hidden_size = unet.config.block_out_channels[block_id] - - lora_attn_procs[name] = LoRAAttnProcessor( - hidden_size=hidden_size, - cross_attention_dim=cross_attention_dim, - rank=args.rank, - ) - - unet.set_attn_processor(lora_attn_procs) + unet.add_adapter(unet_lora_config) if args.enable_xformers_memory_efficient_attention: if is_xformers_available(): @@ -491,7 +466,7 @@ def main(): else: raise ValueError("xformers is not available. Make sure it is installed correctly") - lora_layers = AttnProcsLayers(unet.attn_processors) + lora_layers = filter(lambda p: p.requires_grad, unet.parameters()) # Enable TF32 for faster training on Ampere GPUs, # cf https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices @@ -517,7 +492,7 @@ def main(): optimizer_cls = torch.optim.AdamW optimizer = optimizer_cls( - lora_layers.parameters(), + lora_layers, lr=args.learning_rate, betas=(args.adam_beta1, args.adam_beta2), weight_decay=args.adam_weight_decay, @@ -777,7 +752,7 @@ def collate_fn(examples): # Backpropagate accelerator.backward(loss) if accelerator.sync_gradients: - params_to_clip = lora_layers.parameters() + params_to_clip = lora_layers accelerator.clip_grad_norm_(params_to_clip, args.max_grad_norm) optimizer.step() lr_scheduler.step() @@ -814,6 +789,15 @@ def collate_fn(examples): save_path = os.path.join(args.output_dir, f"checkpoint-{global_step}") accelerator.save_state(save_path) + + unet_lora_state_dict = get_peft_model_state_dict(unet) + + StableDiffusionPipeline.save_lora_weights( + save_directory=save_path, + unet_lora_layers=unet_lora_state_dict, + safe_serialization=True, + ) + logger.info(f"Saved state to {save_path}") logs = {"step_loss": loss.detach().item(), "lr": lr_scheduler.get_last_lr()[0]} @@ -869,7 +853,13 @@ def collate_fn(examples): accelerator.wait_for_everyone() if accelerator.is_main_process: unet = unet.to(torch.float32) - unet.save_attn_procs(args.output_dir) + + unet_lora_state_dict = get_peft_model_state_dict(unet) + DiffusionPipeline.save_pretrained( + args.output_dir, + unet_lora_layers=unet_lora_state_dict, + safe_serialization=True, + ) if args.push_to_hub: save_model_card( From b86543fe333fc3b759babe95fe1eb5181ac45cac Mon Sep 17 00:00:00 2001 From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com> Date: Fri, 13 Oct 2023 18:03:52 +0200 Subject: [PATCH 02/25] Update examples/text_to_image/train_text_to_image_lora.py --- examples/text_to_image/train_text_to_image_lora.py | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/text_to_image/train_text_to_image_lora.py b/examples/text_to_image/train_text_to_image_lora.py index e1e9798afb69..44cdb60a2ec6 100644 --- a/examples/text_to_image/train_text_to_image_lora.py +++ b/examples/text_to_image/train_text_to_image_lora.py @@ -439,6 +439,7 @@ def main(): elif accelerator.mixed_precision == "bf16": weight_dtype = torch.bfloat16 + # Freeze the unet parameters before adding adapters for param in unet.parameters(): param.requires_grad_(False) From af99c1258337eff263419d470a8aff15b21d8c38 Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Mon, 16 Oct 2023 21:46:58 +0000 Subject: [PATCH 03/25] fix --- examples/text_to_image/train_text_to_image_lora.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/text_to_image/train_text_to_image_lora.py b/examples/text_to_image/train_text_to_image_lora.py index 44cdb60a2ec6..3462c74be4b6 100644 --- a/examples/text_to_image/train_text_to_image_lora.py +++ b/examples/text_to_image/train_text_to_image_lora.py @@ -856,8 +856,8 @@ def collate_fn(examples): unet = unet.to(torch.float32) unet_lora_state_dict = get_peft_model_state_dict(unet) - DiffusionPipeline.save_pretrained( - args.output_dir, + StableDiffusionPipeline.save_lora_weights( + save_directory=args.output_dir, unet_lora_layers=unet_lora_state_dict, safe_serialization=True, ) From 89d4bed7af29ee5fe824061e043465893319471b Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Mon, 23 Oct 2023 16:17:48 +0000 Subject: [PATCH 04/25] add for SDXL --- .../text_to_image/train_text_to_image_lora.py | 4 +- .../train_text_to_image_lora_sdxl.py | 70 ++++--------------- 2 files changed, 15 insertions(+), 59 deletions(-) diff --git a/examples/text_to_image/train_text_to_image_lora.py b/examples/text_to_image/train_text_to_image_lora.py index 3462c74be4b6..d25aa6047dcb 100644 --- a/examples/text_to_image/train_text_to_image_lora.py +++ b/examples/text_to_image/train_text_to_image_lora.py @@ -443,9 +443,7 @@ def main(): for param in unet.parameters(): param.requires_grad_(False) - unet_lora_config = LoraConfig( - r=args.rank, target_modules=["conv1", "conv2", "conv_shortcut", "proj_in", "proj_out"] - ) + unet_lora_config = LoraConfig(r=args.rank, target_modules=["to_k", "to_q", "to_v"]) # Move unet, vae and text_encoder to device and cast to weight_dtype unet.to(accelerator.device, dtype=weight_dtype) diff --git a/examples/text_to_image/train_text_to_image_lora_sdxl.py b/examples/text_to_image/train_text_to_image_lora_sdxl.py index 35de6eedcabd..a523a78fd171 100644 --- a/examples/text_to_image/train_text_to_image_lora_sdxl.py +++ b/examples/text_to_image/train_text_to_image_lora_sdxl.py @@ -41,6 +41,8 @@ from torchvision.transforms.functional import crop from tqdm.auto import tqdm from transformers import AutoTokenizer, PretrainedConfig +from peft import LoraConfig +from peft.utils import get_peft_model_state_dict import diffusers from diffusers import ( @@ -609,53 +611,17 @@ def main(args): # now we will add new LoRA weights to the attention layers # Set correct lora layers - unet_lora_parameters = [] - for attn_processor_name, attn_processor in unet.attn_processors.items(): - # Parse the attention module. - attn_module = unet - for n in attn_processor_name.split(".")[:-1]: - attn_module = getattr(attn_module, n) - - # Set the `lora_layer` attribute of the attention-related matrices. - attn_module.to_q.set_lora_layer( - LoRALinearLayer( - in_features=attn_module.to_q.in_features, out_features=attn_module.to_q.out_features, rank=args.rank - ) - ) - attn_module.to_k.set_lora_layer( - LoRALinearLayer( - in_features=attn_module.to_k.in_features, out_features=attn_module.to_k.out_features, rank=args.rank - ) - ) - attn_module.to_v.set_lora_layer( - LoRALinearLayer( - in_features=attn_module.to_v.in_features, out_features=attn_module.to_v.out_features, rank=args.rank - ) - ) - attn_module.to_out[0].set_lora_layer( - LoRALinearLayer( - in_features=attn_module.to_out[0].in_features, - out_features=attn_module.to_out[0].out_features, - rank=args.rank, - ) - ) + unet_lora_config = LoraConfig(r=args.rank, target_modules=["to_k", "to_q", "to_v"]) - # Accumulate the LoRA params to optimize. - unet_lora_parameters.extend(attn_module.to_q.lora_layer.parameters()) - unet_lora_parameters.extend(attn_module.to_k.lora_layer.parameters()) - unet_lora_parameters.extend(attn_module.to_v.lora_layer.parameters()) - unet_lora_parameters.extend(attn_module.to_out[0].lora_layer.parameters()) + unet.add_adapter(unet_lora_config) # The text encoder comes from πŸ€— transformers, so we cannot directly modify it. # So, instead, we monkey-patch the forward calls of its attention-blocks. if args.train_text_encoder: # ensure that dtype is float32, even if rest of the model that isn't trained is loaded in fp16 - text_lora_parameters_one = LoraLoaderMixin._modify_text_encoder( - text_encoder_one, dtype=torch.float32, rank=args.rank - ) - text_lora_parameters_two = LoraLoaderMixin._modify_text_encoder( - text_encoder_two, dtype=torch.float32, rank=args.rank - ) + text_lora_config = LoraConfig(r=args.rank, target_modules=["q_proj", "k_proj", "v_proj"]) + text_encoder_one.add_adapter(text_lora_config) + text_encoder_two.add_adapter(text_lora_config) # create custom saving & loading hooks so that `accelerator.save_state(...)` serializes in a nice format def save_model_hook(models, weights, output_dir): @@ -743,11 +709,7 @@ def load_model_hook(models, input_dir): optimizer_class = torch.optim.AdamW # Optimizer creation - params_to_optimize = ( - itertools.chain(unet_lora_parameters, text_lora_parameters_one, text_lora_parameters_two) - if args.train_text_encoder - else unet_lora_parameters - ) + params_to_optimize = filter(lambda p: p.requires_grad, unet.parameters()) optimizer = optimizer_class( params_to_optimize, lr=args.learning_rate, @@ -1081,12 +1043,7 @@ def compute_time_ids(original_size, crops_coords_top_left): # Backpropagate accelerator.backward(loss) if accelerator.sync_gradients: - params_to_clip = ( - itertools.chain(unet_lora_parameters, text_lora_parameters_one, text_lora_parameters_two) - if args.train_text_encoder - else unet_lora_parameters - ) - accelerator.clip_grad_norm_(params_to_clip, args.max_grad_norm) + accelerator.clip_grad_norm_(params_to_optimize, args.max_grad_norm) optimizer.step() lr_scheduler.step() optimizer.zero_grad() @@ -1181,20 +1138,21 @@ def compute_time_ids(original_size, crops_coords_top_left): accelerator.wait_for_everyone() if accelerator.is_main_process: unet = accelerator.unwrap_model(unet) - unet_lora_layers = unet_attn_processors_state_dict(unet) + unet_lora_state_dict = get_peft_model_state_dict(unet) if args.train_text_encoder: text_encoder_one = accelerator.unwrap_model(text_encoder_one) text_encoder_lora_layers = text_encoder_lora_state_dict(text_encoder_one) - text_encoder_two = accelerator.unwrap_model(text_encoder_two) - text_encoder_2_lora_layers = text_encoder_lora_state_dict(text_encoder_two) + + text_encoder_lora_layers = get_peft_model_state_dict(text_encoder_one) + text_encoder_lora_layers = get_peft_model_state_dict(text_encoder_2_lora_layers) else: text_encoder_lora_layers = None text_encoder_2_lora_layers = None StableDiffusionXLPipeline.save_lora_weights( save_directory=args.output_dir, - unet_lora_layers=unet_lora_layers, + unet_lora_layers=unet_lora_state_dict, text_encoder_lora_layers=text_encoder_lora_layers, text_encoder_2_lora_layers=text_encoder_2_lora_layers, ) From 428191373483df37f0be4ef3c0a29bdfdc739bcb Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Mon, 23 Oct 2023 16:18:19 +0000 Subject: [PATCH 05/25] oops --- examples/text_to_image/train_text_to_image_lora_sdxl.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/examples/text_to_image/train_text_to_image_lora_sdxl.py b/examples/text_to_image/train_text_to_image_lora_sdxl.py index a523a78fd171..0987d38b1c79 100644 --- a/examples/text_to_image/train_text_to_image_lora_sdxl.py +++ b/examples/text_to_image/train_text_to_image_lora_sdxl.py @@ -16,7 +16,6 @@ """Fine-tuning script for Stable Diffusion XL for text2image with support for LoRA.""" import argparse -import itertools import logging import math import os @@ -37,12 +36,12 @@ from datasets import load_dataset from huggingface_hub import create_repo, upload_folder from packaging import version +from peft import LoraConfig +from peft.utils import get_peft_model_state_dict from torchvision import transforms from torchvision.transforms.functional import crop from tqdm.auto import tqdm from transformers import AutoTokenizer, PretrainedConfig -from peft import LoraConfig -from peft.utils import get_peft_model_state_dict import diffusers from diffusers import ( @@ -52,7 +51,6 @@ UNet2DConditionModel, ) from diffusers.loaders import LoraLoaderMixin, text_encoder_lora_state_dict -from diffusers.models.lora import LoRALinearLayer from diffusers.optimization import get_scheduler from diffusers.training_utils import compute_snr from diffusers.utils import check_min_version, is_wandb_available @@ -1145,7 +1143,7 @@ def compute_time_ids(original_size, crops_coords_top_left): text_encoder_lora_layers = text_encoder_lora_state_dict(text_encoder_one) text_encoder_lora_layers = get_peft_model_state_dict(text_encoder_one) - text_encoder_lora_layers = get_peft_model_state_dict(text_encoder_2_lora_layers) + text_encoder_2_lora_layers = get_peft_model_state_dict(text_encoder_two) else: text_encoder_lora_layers = None text_encoder_2_lora_layers = None From 6a48ad050d01d54f3f9e5ba66bcc7a2d6197a9e1 Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Sun, 5 Nov 2023 07:25:37 +0000 Subject: [PATCH 06/25] make sure to install peft --- .github/workflows/pr_tests.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/pr_tests.yml b/.github/workflows/pr_tests.yml index aaaea147f7ab..a75480b868b2 100644 --- a/.github/workflows/pr_tests.yml +++ b/.github/workflows/pr_tests.yml @@ -113,6 +113,7 @@ jobs: - name: Run example PyTorch CPU tests if: ${{ matrix.config.framework == 'pytorch_examples' }} run: | + python -m pip install peft python -m pytest -n 2 --max-worker-restart=0 --dist=loadfile \ --make-reports=tests_${{ matrix.config.report }} \ examples/test_examples.py From 069a929f95920228dc516d28b81c9fd45cf349c6 Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Sun, 5 Nov 2023 07:41:47 +0000 Subject: [PATCH 07/25] fix --- examples/text_to_image/train_text_to_image_lora_sdxl.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/text_to_image/train_text_to_image_lora_sdxl.py b/examples/text_to_image/train_text_to_image_lora_sdxl.py index 466550dd1981..c3495a361059 100644 --- a/examples/text_to_image/train_text_to_image_lora_sdxl.py +++ b/examples/text_to_image/train_text_to_image_lora_sdxl.py @@ -1141,7 +1141,7 @@ def compute_time_ids(original_size, crops_coords_top_left): if args.train_text_encoder: text_encoder_one = accelerator.unwrap_model(text_encoder_one) - text_encoder_lora_layers = text_encoder_lora_state_dict(text_encoder_one) + text_encoder_two = accelerator.unwrap_model(text_encoder_two) text_encoder_lora_layers = get_peft_model_state_dict(text_encoder_one) text_encoder_2_lora_layers = get_peft_model_state_dict(text_encoder_two) From e4b0f1dcc60fa9597721c3df1ad108886f0a84ac Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Tue, 14 Nov 2023 13:38:27 +0000 Subject: [PATCH 08/25] fix --- examples/text_to_image/train_text_to_image_lora_sdxl.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/examples/text_to_image/train_text_to_image_lora_sdxl.py b/examples/text_to_image/train_text_to_image_lora_sdxl.py index b9920ea8fcec..12ca4bef6000 100644 --- a/examples/text_to_image/train_text_to_image_lora_sdxl.py +++ b/examples/text_to_image/train_text_to_image_lora_sdxl.py @@ -51,7 +51,6 @@ UNet2DConditionModel, ) from diffusers.loaders import LoraLoaderMixin -from diffusers.models.lora import LoRALinearLayer, text_encoder_lora_state_dict from diffusers.optimization import get_scheduler from diffusers.training_utils import compute_snr from diffusers.utils import check_min_version, is_wandb_available @@ -619,7 +618,7 @@ def main(args): # So, instead, we monkey-patch the forward calls of its attention-blocks. if args.train_text_encoder: # ensure that dtype is float32, even if rest of the model that isn't trained is loaded in fp16 - text_lora_config = LoraConfig(r=args.rank, target_modules=["q_proj", "k_proj", "v_proj"]) + text_lora_config = LoraConfig(r=args.rank, target_modules=["q_proj", "k_proj", "v_proj", "out_proj"]) text_encoder_one.add_adapter(text_lora_config) text_encoder_two.add_adapter(text_lora_config) @@ -634,11 +633,11 @@ def save_model_hook(models, weights, output_dir): for model in models: if isinstance(model, type(accelerator.unwrap_model(unet))): - unet_lora_layers_to_save = unet_attn_processors_state_dict(model) + unet_lora_layers_to_save = get_peft_model_state_dict(model) elif isinstance(model, type(accelerator.unwrap_model(text_encoder_one))): - text_encoder_one_lora_layers_to_save = text_encoder_lora_state_dict(model) + text_encoder_one_lora_layers_to_save = get_peft_model_state_dict(model) elif isinstance(model, type(accelerator.unwrap_model(text_encoder_two))): - text_encoder_two_lora_layers_to_save = text_encoder_lora_state_dict(model) + text_encoder_two_lora_layers_to_save = get_peft_model_state_dict(model) else: raise ValueError(f"unexpected save model: {model.__class__}") From 62c33c0ba8c09bb6929a071fe3bffd490ccacaa2 Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Tue, 14 Nov 2023 13:50:52 +0000 Subject: [PATCH 09/25] fix dreambooth and lora --- examples/dreambooth/train_dreambooth_lora.py | 107 ++++-------------- .../train_text_to_image_lora_sdxl.py | 7 +- 2 files changed, 23 insertions(+), 91 deletions(-) diff --git a/examples/dreambooth/train_dreambooth_lora.py b/examples/dreambooth/train_dreambooth_lora.py index d10e62ac8def..911a7023ac45 100644 --- a/examples/dreambooth/train_dreambooth_lora.py +++ b/examples/dreambooth/train_dreambooth_lora.py @@ -63,6 +63,9 @@ from diffusers.utils import check_min_version, is_wandb_available from diffusers.utils.import_utils import is_xformers_available +from peft import LoraConfig +from peft.utils import get_peft_model_state_dict + # Will error if the minimal version of diffusers is not installed. Remove at your own risks. check_min_version("0.24.0.dev0") @@ -823,80 +826,16 @@ def main(args): if args.train_text_encoder: text_encoder.gradient_checkpointing_enable() - # now we will add new LoRA weights to the attention layers - # It's important to realize here how many attention weights will be added and of which sizes - # The sizes of the attention layers consist only of two different variables: - # 1) - the "hidden_size", which is increased according to `unet.config.block_out_channels`. - # 2) - the "cross attention size", which is set to `unet.config.cross_attention_dim`. - - # Let's first see how many attention processors we will have to set. - # For Stable Diffusion, it should be equal to: - # - down blocks (2x attention layers) * (2x transformer layers) * (3x down blocks) = 12 - # - mid blocks (2x attention layers) * (1x transformer layers) * (1x mid blocks) = 2 - # - up blocks (2x attention layers) * (3x transformer layers) * (3x up blocks) = 18 - # => 32 layers - - # Set correct lora layers - unet_lora_parameters = [] - for attn_processor_name, attn_processor in unet.attn_processors.items(): - # Parse the attention module. - attn_module = unet - for n in attn_processor_name.split(".")[:-1]: - attn_module = getattr(attn_module, n) - - # Set the `lora_layer` attribute of the attention-related matrices. - attn_module.to_q.set_lora_layer( - LoRALinearLayer( - in_features=attn_module.to_q.in_features, out_features=attn_module.to_q.out_features, rank=args.rank - ) - ) - attn_module.to_k.set_lora_layer( - LoRALinearLayer( - in_features=attn_module.to_k.in_features, out_features=attn_module.to_k.out_features, rank=args.rank - ) - ) - attn_module.to_v.set_lora_layer( - LoRALinearLayer( - in_features=attn_module.to_v.in_features, out_features=attn_module.to_v.out_features, rank=args.rank - ) - ) - attn_module.to_out[0].set_lora_layer( - LoRALinearLayer( - in_features=attn_module.to_out[0].in_features, - out_features=attn_module.to_out[0].out_features, - rank=args.rank, - ) - ) - # Accumulate the LoRA params to optimize. - unet_lora_parameters.extend(attn_module.to_q.lora_layer.parameters()) - unet_lora_parameters.extend(attn_module.to_k.lora_layer.parameters()) - unet_lora_parameters.extend(attn_module.to_v.lora_layer.parameters()) - unet_lora_parameters.extend(attn_module.to_out[0].lora_layer.parameters()) - - if isinstance(attn_processor, (AttnAddedKVProcessor, SlicedAttnAddedKVProcessor, AttnAddedKVProcessor2_0)): - attn_module.add_k_proj.set_lora_layer( - LoRALinearLayer( - in_features=attn_module.add_k_proj.in_features, - out_features=attn_module.add_k_proj.out_features, - rank=args.rank, - ) - ) - attn_module.add_v_proj.set_lora_layer( - LoRALinearLayer( - in_features=attn_module.add_v_proj.in_features, - out_features=attn_module.add_v_proj.out_features, - rank=args.rank, - ) - ) - unet_lora_parameters.extend(attn_module.add_k_proj.lora_layer.parameters()) - unet_lora_parameters.extend(attn_module.add_v_proj.lora_layer.parameters()) + # now we will add new LoRA weights to the attention layers + unet_lora_config = LoraConfig(r=args.rank, target_modules=["to_k", "to_q", "to_v", "add_k_proj", "add_v_proj"]) + unet.add_adapter(unet_lora_config) - # The text encoder comes from πŸ€— transformers, so we cannot directly modify it. - # So, instead, we monkey-patch the forward calls of its attention-blocks. + # The text encoder comes from πŸ€— transformers, we will also attach adapters to it. if args.train_text_encoder: # ensure that dtype is float32, even if rest of the model that isn't trained is loaded in fp16 - text_lora_parameters = LoraLoaderMixin._modify_text_encoder(text_encoder, dtype=torch.float32, rank=args.rank) + text_lora_config = LoraConfig(r=args.rank, target_modules=["q_proj", "k_proj", "v_proj", "out_proj"]) + text_encoder.add_adapter(text_lora_config) # create custom saving & loading hooks so that `accelerator.save_state(...)` serializes in a nice format def save_model_hook(models, weights, output_dir): @@ -970,11 +909,10 @@ def load_model_hook(models, input_dir): optimizer_class = torch.optim.AdamW # Optimizer creation - params_to_optimize = ( - itertools.chain(unet_lora_parameters, text_lora_parameters) - if args.train_text_encoder - else unet_lora_parameters - ) + params_to_optimize = list(filter(lambda p: p.requires_grad, unet.parameters())) + if args.train_text_encoder: + params_to_optimize = params_to_optimize + list(filter(lambda p: p.requires_grad, text_encoder.parameters())) + optimizer = optimizer_class( params_to_optimize, lr=args.learning_rate, @@ -1217,12 +1155,7 @@ def compute_text_embeddings(prompt): accelerator.backward(loss) if accelerator.sync_gradients: - params_to_clip = ( - itertools.chain(unet_lora_parameters, text_lora_parameters) - if args.train_text_encoder - else unet_lora_parameters - ) - accelerator.clip_grad_norm_(params_to_clip, args.max_grad_norm) + accelerator.clip_grad_norm_(params_to_optimize, args.max_grad_norm) optimizer.step() lr_scheduler.step() optimizer.zero_grad() @@ -1344,18 +1277,16 @@ def compute_text_embeddings(prompt): if accelerator.is_main_process: unet = accelerator.unwrap_model(unet) unet = unet.to(torch.float32) - unet_lora_layers = unet_lora_state_dict(unet) - if text_encoder is not None and args.train_text_encoder: + unet_lora_state_dict = get_peft_model_state_dict(unet) + + if args.train_text_encoder: text_encoder = accelerator.unwrap_model(text_encoder) - text_encoder = text_encoder.to(torch.float32) - text_encoder_lora_layers = text_encoder_lora_state_dict(text_encoder) - else: - text_encoder_lora_layers = None + text_encoder_lora_layers = get_peft_model_state_dict(text_encoder) LoraLoaderMixin.save_lora_weights( save_directory=args.output_dir, - unet_lora_layers=unet_lora_layers, + unet_lora_layers=unet_lora_state_dict, text_encoder_lora_layers=text_encoder_lora_layers, ) diff --git a/examples/text_to_image/train_text_to_image_lora_sdxl.py b/examples/text_to_image/train_text_to_image_lora_sdxl.py index 12ca4bef6000..0e3f65842bb7 100644 --- a/examples/text_to_image/train_text_to_image_lora_sdxl.py +++ b/examples/text_to_image/train_text_to_image_lora_sdxl.py @@ -614,8 +614,7 @@ def main(args): unet.add_adapter(unet_lora_config) - # The text encoder comes from πŸ€— transformers, so we cannot directly modify it. - # So, instead, we monkey-patch the forward calls of its attention-blocks. + # The text encoder comes from πŸ€— transformers, we will also attach adapters to it. if args.train_text_encoder: # ensure that dtype is float32, even if rest of the model that isn't trained is loaded in fp16 text_lora_config = LoraConfig(r=args.rank, target_modules=["q_proj", "k_proj", "v_proj", "out_proj"]) @@ -708,7 +707,9 @@ def load_model_hook(models, input_dir): optimizer_class = torch.optim.AdamW # Optimizer creation - params_to_optimize = filter(lambda p: p.requires_grad, unet.parameters()) + params_to_optimize = list(filter(lambda p: p.requires_grad, unet.parameters())) + if args.train_text_encoder: + params_to_optimize = params_to_optimize + list(filter(lambda p: p.requires_grad, text_encoder_one.parameters())) + list(filter(lambda p: p.requires_grad, text_encoder_two.parameters())) optimizer = optimizer_class( params_to_optimize, lr=args.learning_rate, From a1e1cdffb9a41cb71dceeed7f32427814f42f2d8 Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Tue, 14 Nov 2023 13:51:53 +0000 Subject: [PATCH 10/25] more fixes --- examples/dreambooth/train_dreambooth_lora.py | 18 ++++-------------- .../train_text_to_image_lora_sdxl.py | 6 +++++- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/examples/dreambooth/train_dreambooth_lora.py b/examples/dreambooth/train_dreambooth_lora.py index 911a7023ac45..e43a8135bba8 100644 --- a/examples/dreambooth/train_dreambooth_lora.py +++ b/examples/dreambooth/train_dreambooth_lora.py @@ -17,7 +17,6 @@ import copy import gc import hashlib -import itertools import logging import math import os @@ -35,6 +34,8 @@ from accelerate.utils import ProjectConfiguration, set_seed from huggingface_hub import create_repo, upload_folder from packaging import version +from peft import LoraConfig +from peft.utils import get_peft_model_state_dict from PIL import Image from PIL.ImageOps import exif_transpose from torch.utils.data import Dataset @@ -52,20 +53,10 @@ UNet2DConditionModel, ) from diffusers.loaders import LoraLoaderMixin -from diffusers.models.attention_processor import ( - AttnAddedKVProcessor, - AttnAddedKVProcessor2_0, - SlicedAttnAddedKVProcessor, -) -from diffusers.models.lora import LoRALinearLayer, text_encoder_lora_state_dict from diffusers.optimization import get_scheduler -from diffusers.training_utils import unet_lora_state_dict from diffusers.utils import check_min_version, is_wandb_available from diffusers.utils.import_utils import is_xformers_available -from peft import LoraConfig -from peft.utils import get_peft_model_state_dict - # Will error if the minimal version of diffusers is not installed. Remove at your own risks. check_min_version("0.24.0.dev0") @@ -826,7 +817,6 @@ def main(args): if args.train_text_encoder: text_encoder.gradient_checkpointing_enable() - # now we will add new LoRA weights to the attention layers unet_lora_config = LoraConfig(r=args.rank, target_modules=["to_k", "to_q", "to_v", "add_k_proj", "add_v_proj"]) unet.add_adapter(unet_lora_config) @@ -847,9 +837,9 @@ def save_model_hook(models, weights, output_dir): for model in models: if isinstance(model, type(accelerator.unwrap_model(unet))): - unet_lora_layers_to_save = unet_lora_state_dict(model) + unet_lora_layers_to_save = get_peft_model_state_dict(model) elif isinstance(model, type(accelerator.unwrap_model(text_encoder))): - text_encoder_lora_layers_to_save = text_encoder_lora_state_dict(model) + text_encoder_lora_layers_to_save = get_peft_model_state_dict(model) else: raise ValueError(f"unexpected save model: {model.__class__}") diff --git a/examples/text_to_image/train_text_to_image_lora_sdxl.py b/examples/text_to_image/train_text_to_image_lora_sdxl.py index 0e3f65842bb7..d6485ef3dda9 100644 --- a/examples/text_to_image/train_text_to_image_lora_sdxl.py +++ b/examples/text_to_image/train_text_to_image_lora_sdxl.py @@ -709,7 +709,11 @@ def load_model_hook(models, input_dir): # Optimizer creation params_to_optimize = list(filter(lambda p: p.requires_grad, unet.parameters())) if args.train_text_encoder: - params_to_optimize = params_to_optimize + list(filter(lambda p: p.requires_grad, text_encoder_one.parameters())) + list(filter(lambda p: p.requires_grad, text_encoder_two.parameters())) + params_to_optimize = ( + params_to_optimize + + list(filter(lambda p: p.requires_grad, text_encoder_one.parameters())) + + list(filter(lambda p: p.requires_grad, text_encoder_two.parameters())) + ) optimizer = optimizer_class( params_to_optimize, lr=args.learning_rate, From c3d3002d3ac9dc32da7303631b1ee9811eb2503f Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Tue, 14 Nov 2023 13:52:44 +0000 Subject: [PATCH 11/25] add peft to requirements.txt --- examples/dreambooth/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/dreambooth/requirements.txt b/examples/dreambooth/requirements.txt index 7a612982f4ab..75bf0a4a7920 100644 --- a/examples/dreambooth/requirements.txt +++ b/examples/dreambooth/requirements.txt @@ -4,3 +4,4 @@ transformers>=4.25.1 ftfy tensorboard Jinja2 +peft \ No newline at end of file From 340150b1010dd4e2ee7ff7e7f52a5ef48047927d Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Tue, 14 Nov 2023 14:20:35 +0000 Subject: [PATCH 12/25] fix --- examples/dreambooth/train_dreambooth_lora.py | 2 + .../dreambooth/train_dreambooth_lora_sdxl.py | 85 +++++-------------- 2 files changed, 22 insertions(+), 65 deletions(-) diff --git a/examples/dreambooth/train_dreambooth_lora.py b/examples/dreambooth/train_dreambooth_lora.py index e43a8135bba8..809e769d5d1a 100644 --- a/examples/dreambooth/train_dreambooth_lora.py +++ b/examples/dreambooth/train_dreambooth_lora.py @@ -1273,6 +1273,8 @@ def compute_text_embeddings(prompt): if args.train_text_encoder: text_encoder = accelerator.unwrap_model(text_encoder) text_encoder_lora_layers = get_peft_model_state_dict(text_encoder) + else: + text_encoder_lora_layers = None LoraLoaderMixin.save_lora_weights( save_directory=args.output_dir, diff --git a/examples/dreambooth/train_dreambooth_lora_sdxl.py b/examples/dreambooth/train_dreambooth_lora_sdxl.py index ef2020398b2d..dbc4babb85d5 100644 --- a/examples/dreambooth/train_dreambooth_lora_sdxl.py +++ b/examples/dreambooth/train_dreambooth_lora_sdxl.py @@ -16,7 +16,6 @@ import argparse import gc import hashlib -import itertools import logging import math import os @@ -34,6 +33,8 @@ from accelerate.utils import DistributedDataParallelKwargs, ProjectConfiguration, set_seed from huggingface_hub import create_repo, upload_folder from packaging import version +from peft import LoraConfig +from peft.utils import get_peft_model_state_dict from PIL import Image from PIL.ImageOps import exif_transpose from torch.utils.data import Dataset @@ -50,9 +51,7 @@ UNet2DConditionModel, ) from diffusers.loaders import LoraLoaderMixin -from diffusers.models.lora import LoRALinearLayer, text_encoder_lora_state_dict from diffusers.optimization import get_scheduler -from diffusers.training_utils import unet_lora_state_dict from diffusers.utils import check_min_version, is_wandb_available from diffusers.utils.import_utils import is_xformers_available @@ -745,54 +744,15 @@ def main(args): text_encoder_two.gradient_checkpointing_enable() # now we will add new LoRA weights to the attention layers - # Set correct lora layers - unet_lora_parameters = [] - for attn_processor_name, attn_processor in unet.attn_processors.items(): - # Parse the attention module. - attn_module = unet - for n in attn_processor_name.split(".")[:-1]: - attn_module = getattr(attn_module, n) - - # Set the `lora_layer` attribute of the attention-related matrices. - attn_module.to_q.set_lora_layer( - LoRALinearLayer( - in_features=attn_module.to_q.in_features, out_features=attn_module.to_q.out_features, rank=args.rank - ) - ) - attn_module.to_k.set_lora_layer( - LoRALinearLayer( - in_features=attn_module.to_k.in_features, out_features=attn_module.to_k.out_features, rank=args.rank - ) - ) - attn_module.to_v.set_lora_layer( - LoRALinearLayer( - in_features=attn_module.to_v.in_features, out_features=attn_module.to_v.out_features, rank=args.rank - ) - ) - attn_module.to_out[0].set_lora_layer( - LoRALinearLayer( - in_features=attn_module.to_out[0].in_features, - out_features=attn_module.to_out[0].out_features, - rank=args.rank, - ) - ) - - # Accumulate the LoRA params to optimize. - unet_lora_parameters.extend(attn_module.to_q.lora_layer.parameters()) - unet_lora_parameters.extend(attn_module.to_k.lora_layer.parameters()) - unet_lora_parameters.extend(attn_module.to_v.lora_layer.parameters()) - unet_lora_parameters.extend(attn_module.to_out[0].lora_layer.parameters()) + unet_lora_config = LoraConfig(r=args.rank, target_modules=["to_k", "to_q", "to_v", "to_out.0"]) + unet.add_adapter(unet_lora_config) # The text encoder comes from πŸ€— transformers, so we cannot directly modify it. # So, instead, we monkey-patch the forward calls of its attention-blocks. if args.train_text_encoder: - # ensure that dtype is float32, even if rest of the model that isn't trained is loaded in fp16 - text_lora_parameters_one = LoraLoaderMixin._modify_text_encoder( - text_encoder_one, dtype=torch.float32, rank=args.rank - ) - text_lora_parameters_two = LoraLoaderMixin._modify_text_encoder( - text_encoder_two, dtype=torch.float32, rank=args.rank - ) + text_lora_config = LoraConfig(r=args.rank, target_modules=["q_proj", "k_proj", "v_proj", "out_proj"]) + text_encoder_one.add_adapter(text_lora_config) + text_encoder_two.add_adapter(text_lora_config) # create custom saving & loading hooks so that `accelerator.save_state(...)` serializes in a nice format def save_model_hook(models, weights, output_dir): @@ -805,11 +765,11 @@ def save_model_hook(models, weights, output_dir): for model in models: if isinstance(model, type(accelerator.unwrap_model(unet))): - unet_lora_layers_to_save = unet_lora_state_dict(model) + unet_lora_layers_to_save = get_peft_model_state_dict(model) elif isinstance(model, type(accelerator.unwrap_model(text_encoder_one))): - text_encoder_one_lora_layers_to_save = text_encoder_lora_state_dict(model) + text_encoder_one_lora_layers_to_save = get_peft_model_state_dict(model) elif isinstance(model, type(accelerator.unwrap_model(text_encoder_two))): - text_encoder_two_lora_layers_to_save = text_encoder_lora_state_dict(model) + text_encoder_two_lora_layers_to_save = get_peft_model_state_dict(model) else: raise ValueError(f"unexpected save model: {model.__class__}") @@ -879,12 +839,12 @@ def load_model_hook(models, input_dir): else: optimizer_class = torch.optim.AdamW - # Optimizer creation - params_to_optimize = ( - itertools.chain(unet_lora_parameters, text_lora_parameters_one, text_lora_parameters_two) - if args.train_text_encoder - else unet_lora_parameters - ) + params_to_optimize = list(filter(lambda p: p.requires_grad, unet.parameters())) + if args.train_text_encoder: + params_to_optimize = params_to_optimize + list( + filter(lambda p: p.requires_grad, [text_encoder_one.parameters(), text_encoder_two.parameters()]) + ) + optimizer = optimizer_class( params_to_optimize, lr=args.learning_rate, @@ -1155,12 +1115,7 @@ def compute_text_embeddings(prompt, text_encoders, tokenizers): accelerator.backward(loss) if accelerator.sync_gradients: - params_to_clip = ( - itertools.chain(unet_lora_parameters, text_lora_parameters_one, text_lora_parameters_two) - if args.train_text_encoder - else unet_lora_parameters - ) - accelerator.clip_grad_norm_(params_to_clip, args.max_grad_norm) + accelerator.clip_grad_norm_(params_to_optimize, args.max_grad_norm) optimizer.step() lr_scheduler.step() optimizer.zero_grad() @@ -1277,13 +1232,13 @@ def compute_text_embeddings(prompt, text_encoders, tokenizers): if accelerator.is_main_process: unet = accelerator.unwrap_model(unet) unet = unet.to(torch.float32) - unet_lora_layers = unet_lora_state_dict(unet) + unet_lora_layers = get_peft_model_state_dict(unet) if args.train_text_encoder: text_encoder_one = accelerator.unwrap_model(text_encoder_one) - text_encoder_lora_layers = text_encoder_lora_state_dict(text_encoder_one.to(torch.float32)) + text_encoder_lora_layers = get_peft_model_state_dict(text_encoder_one.to(torch.float32)) text_encoder_two = accelerator.unwrap_model(text_encoder_two) - text_encoder_2_lora_layers = text_encoder_lora_state_dict(text_encoder_two.to(torch.float32)) + text_encoder_2_lora_layers = get_peft_model_state_dict(text_encoder_two.to(torch.float32)) else: text_encoder_lora_layers = None text_encoder_2_lora_layers = None From dff2995fd0f7f9c25076efd77f3a81ba3d6a9b57 Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Tue, 14 Nov 2023 14:42:39 +0000 Subject: [PATCH 13/25] final fix --- examples/dreambooth/train_dreambooth_lora_sdxl.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/examples/dreambooth/train_dreambooth_lora_sdxl.py b/examples/dreambooth/train_dreambooth_lora_sdxl.py index dbc4babb85d5..6eec58080741 100644 --- a/examples/dreambooth/train_dreambooth_lora_sdxl.py +++ b/examples/dreambooth/train_dreambooth_lora_sdxl.py @@ -841,8 +841,10 @@ def load_model_hook(models, input_dir): params_to_optimize = list(filter(lambda p: p.requires_grad, unet.parameters())) if args.train_text_encoder: - params_to_optimize = params_to_optimize + list( - filter(lambda p: p.requires_grad, [text_encoder_one.parameters(), text_encoder_two.parameters()]) + params_to_optimize = ( + params_to_optimize + + list(filter(lambda p: p.requires_grad, text_encoder_one.parameters())) + + list(filter(lambda p: p.requires_grad, text_encoder_two.parameters())) ) optimizer = optimizer_class( From 978d0cd6c6df998065b17ef949689fe6741cd3dc Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Thu, 16 Nov 2023 08:38:44 +0000 Subject: [PATCH 14/25] add peft version in requirements --- examples/dreambooth/requirements.txt | 2 +- examples/text_to_image/requirements.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/dreambooth/requirements.txt b/examples/dreambooth/requirements.txt index 75bf0a4a7920..bf5ce39b8682 100644 --- a/examples/dreambooth/requirements.txt +++ b/examples/dreambooth/requirements.txt @@ -4,4 +4,4 @@ transformers>=4.25.1 ftfy tensorboard Jinja2 -peft \ No newline at end of file +peft>=0.6.0 \ No newline at end of file diff --git a/examples/text_to_image/requirements.txt b/examples/text_to_image/requirements.txt index 31b9026efdc2..6eb46d193dae 100644 --- a/examples/text_to_image/requirements.txt +++ b/examples/text_to_image/requirements.txt @@ -5,3 +5,4 @@ datasets ftfy tensorboard Jinja2 +peft>=0.6.0 \ No newline at end of file From f17140475d9f5c74b7e49e10bcedcd1fb6cb629b Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Thu, 16 Nov 2023 08:39:30 +0000 Subject: [PATCH 15/25] remove comment --- examples/dreambooth/train_dreambooth_lora.py | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/dreambooth/train_dreambooth_lora.py b/examples/dreambooth/train_dreambooth_lora.py index 809e769d5d1a..7cef96ccbafe 100644 --- a/examples/dreambooth/train_dreambooth_lora.py +++ b/examples/dreambooth/train_dreambooth_lora.py @@ -823,7 +823,6 @@ def main(args): # The text encoder comes from πŸ€— transformers, we will also attach adapters to it. if args.train_text_encoder: - # ensure that dtype is float32, even if rest of the model that isn't trained is loaded in fp16 text_lora_config = LoraConfig(r=args.rank, target_modules=["q_proj", "k_proj", "v_proj", "out_proj"]) text_encoder.add_adapter(text_lora_config) From a2f3f202bb5ea859dc36be02b244497bdb4610d0 Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Thu, 16 Nov 2023 08:40:36 +0000 Subject: [PATCH 16/25] change variable names --- examples/dreambooth/train_dreambooth_lora.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/dreambooth/train_dreambooth_lora.py b/examples/dreambooth/train_dreambooth_lora.py index 7cef96ccbafe..e8f511cae601 100644 --- a/examples/dreambooth/train_dreambooth_lora.py +++ b/examples/dreambooth/train_dreambooth_lora.py @@ -1271,14 +1271,14 @@ def compute_text_embeddings(prompt): if args.train_text_encoder: text_encoder = accelerator.unwrap_model(text_encoder) - text_encoder_lora_layers = get_peft_model_state_dict(text_encoder) + text_encoder_state_dict = get_peft_model_state_dict(text_encoder) else: - text_encoder_lora_layers = None + text_encoder_state_dict = None LoraLoaderMixin.save_lora_weights( save_directory=args.output_dir, unet_lora_layers=unet_lora_state_dict, - text_encoder_lora_layers=text_encoder_lora_layers, + text_encoder_lora_layers=text_encoder_state_dict, ) # Final inference From 14b0dd2e38c5cb87c3a30799465ece6e43fcadc0 Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Thu, 16 Nov 2023 08:42:52 +0000 Subject: [PATCH 17/25] add few lines in readme --- examples/dreambooth/README.md | 1 + examples/dreambooth/README_sdxl.md | 1 + examples/text_to_image/README.md | 2 ++ examples/text_to_image/README_sdxl.md | 1 + 4 files changed, 5 insertions(+) diff --git a/examples/dreambooth/README.md b/examples/dreambooth/README.md index 0579e337939d..972fe6e8cffb 100644 --- a/examples/dreambooth/README.md +++ b/examples/dreambooth/README.md @@ -44,6 +44,7 @@ write_basic_config() ``` When running `accelerate config`, if we specify torch compile mode to True there can be dramatic speedups. +Note also that we use PEFT library as backend for LoRA training, make sure to have `peft>=0.6.0` installed in your environment. ### Dog toy example diff --git a/examples/dreambooth/README_sdxl.md b/examples/dreambooth/README_sdxl.md index d78d1ef5d2dd..66232d3063f5 100644 --- a/examples/dreambooth/README_sdxl.md +++ b/examples/dreambooth/README_sdxl.md @@ -47,6 +47,7 @@ write_basic_config() ``` When running `accelerate config`, if we specify torch compile mode to True there can be dramatic speedups. +Note also that we use PEFT library as backend for LoRA training, make sure to have `peft>=0.6.0` installed in your environment. ### Dog toy example diff --git a/examples/text_to_image/README.md b/examples/text_to_image/README.md index 7b9f4013c746..e2cbaca2a9d8 100644 --- a/examples/text_to_image/README.md +++ b/examples/text_to_image/README.md @@ -32,6 +32,8 @@ And initialize an [πŸ€—Accelerate](https://github.com/huggingface/accelerate/) e accelerate config ``` +Note also that we use PEFT library as backend for LoRA training, make sure to have `peft>=0.6.0` installed in your environment. + ### Pokemon example You need to accept the model license before downloading or using the weights. In this example we'll use model version `v1-4`, so you'll need to visit [its card](https://huggingface.co/CompVis/stable-diffusion-v1-4), read the license and tick the checkbox if you agree. diff --git a/examples/text_to_image/README_sdxl.md b/examples/text_to_image/README_sdxl.md index 75c9cb126472..1278185ddf1f 100644 --- a/examples/text_to_image/README_sdxl.md +++ b/examples/text_to_image/README_sdxl.md @@ -45,6 +45,7 @@ write_basic_config() ``` When running `accelerate config`, if we specify torch compile mode to True there can be dramatic speedups. +Note also that we use PEFT library as backend for LoRA training, make sure to have `peft>=0.6.0` installed in your environment. ### Training From b21064f68ffad648455da116ba4b6bb669d1a223 Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Fri, 17 Nov 2023 13:32:57 +0000 Subject: [PATCH 18/25] add to reqs --- examples/dreambooth/requirements_sdxl.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/examples/dreambooth/requirements_sdxl.txt b/examples/dreambooth/requirements_sdxl.txt index 7a612982f4ab..bf5ce39b8682 100644 --- a/examples/dreambooth/requirements_sdxl.txt +++ b/examples/dreambooth/requirements_sdxl.txt @@ -4,3 +4,4 @@ transformers>=4.25.1 ftfy tensorboard Jinja2 +peft>=0.6.0 \ No newline at end of file From b4e108b103951cca3624124e61c64eb13b57a518 Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Mon, 20 Nov 2023 14:14:54 +0000 Subject: [PATCH 19/25] style --- examples/dreambooth/train_dreambooth_lora.py | 2 -- examples/dreambooth/train_dreambooth_lora_sdxl.py | 2 -- 2 files changed, 4 deletions(-) diff --git a/examples/dreambooth/train_dreambooth_lora.py b/examples/dreambooth/train_dreambooth_lora.py index b39c800ac72a..537f79e2f1fa 100644 --- a/examples/dreambooth/train_dreambooth_lora.py +++ b/examples/dreambooth/train_dreambooth_lora.py @@ -16,8 +16,6 @@ import argparse import copy import gc -import hashlib -import itertools import logging import math import os diff --git a/examples/dreambooth/train_dreambooth_lora_sdxl.py b/examples/dreambooth/train_dreambooth_lora_sdxl.py index 9abc2c9192c9..5dbffedd0014 100644 --- a/examples/dreambooth/train_dreambooth_lora_sdxl.py +++ b/examples/dreambooth/train_dreambooth_lora_sdxl.py @@ -15,8 +15,6 @@ import argparse import gc -import hashlib -import itertools import logging import math import os From 75c3948b649598f7005133b05d9b2f6895657abf Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Thu, 23 Nov 2023 15:04:20 +0000 Subject: [PATCH 20/25] fix issues --- examples/dreambooth/train_dreambooth_lora_sdxl.py | 9 ++++++++- examples/text_to_image/train_text_to_image_lora.py | 4 ++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/examples/dreambooth/train_dreambooth_lora_sdxl.py b/examples/dreambooth/train_dreambooth_lora_sdxl.py index b8238d0a128b..a9b3fc91abcc 100644 --- a/examples/dreambooth/train_dreambooth_lora_sdxl.py +++ b/examples/dreambooth/train_dreambooth_lora_sdxl.py @@ -15,6 +15,7 @@ import argparse import gc +import itertools import logging import math import os @@ -51,8 +52,8 @@ UNet2DConditionModel, ) from diffusers.loaders import LoraLoaderMixin - from diffusers.optimization import get_scheduler +from diffusers.training_utils import compute_snr from diffusers.utils import check_min_version, is_wandb_available from diffusers.utils.import_utils import is_xformers_available @@ -1073,6 +1074,12 @@ def load_model_hook(models, input_dir): args.learning_rate * args.gradient_accumulation_steps * args.train_batch_size * accelerator.num_processes ) + unet_lora_parameters = list(filter(lambda p: p.requires_grad, unet.parameters())) + + if args.train_text_encoder: + text_lora_parameters_one = list(filter(lambda p: p.requires_grad, text_encoder_one.parameters())) + text_lora_parameters_two = list(filter(lambda p: p.requires_grad, text_encoder_two.parameters())) + # Optimization parameters unet_lora_parameters_with_lr = {"params": unet_lora_parameters, "lr": args.learning_rate} if args.train_text_encoder: diff --git a/examples/text_to_image/train_text_to_image_lora.py b/examples/text_to_image/train_text_to_image_lora.py index 32faa69998ea..6b1e0033e0db 100644 --- a/examples/text_to_image/train_text_to_image_lora.py +++ b/examples/text_to_image/train_text_to_image_lora.py @@ -651,8 +651,8 @@ def collate_fn(examples): ) # Prepare everything with our `accelerator`. - unet_lora_parameters, optimizer, train_dataloader, lr_scheduler = accelerator.prepare( - unet_lora_parameters, optimizer, train_dataloader, lr_scheduler + unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare( + unet, optimizer, train_dataloader, lr_scheduler ) # We need to recalculate our total training steps as the size of the training dataloader may have changed. From 1e94c4b7f617a596df37bfa6110efeb004aa9b0e Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Thu, 23 Nov 2023 15:47:46 +0000 Subject: [PATCH 21/25] fix lora dreambooth xl tests --- .../dreambooth/train_dreambooth_lora_sdxl.py | 33 +++++++++---------- 1 file changed, 15 insertions(+), 18 deletions(-) diff --git a/examples/dreambooth/train_dreambooth_lora_sdxl.py b/examples/dreambooth/train_dreambooth_lora_sdxl.py index a9b3fc91abcc..4fa7bdc21d34 100644 --- a/examples/dreambooth/train_dreambooth_lora_sdxl.py +++ b/examples/dreambooth/train_dreambooth_lora_sdxl.py @@ -1144,25 +1144,17 @@ def load_model_hook(models, input_dir): optimizer_class = prodigyopt.Prodigy - params_to_optimize = list(filter(lambda p: p.requires_grad, unet.parameters())) - if args.train_text_encoder: - params_to_optimize = ( - params_to_optimize - + list(filter(lambda p: p.requires_grad, text_encoder_one.parameters())) - + list(filter(lambda p: p.requires_grad, text_encoder_two.parameters())) + optimizer = optimizer_class( + params_to_optimize, + lr=args.learning_rate, + betas=(args.adam_beta1, args.adam_beta2), + weight_decay=args.adam_weight_decay, + eps=args.adam_epsilon, + decouple=args.prodigy_decouple, + use_bias_correction=args.prodigy_use_bias_correction, + safeguard_warmup=args.prodigy_safeguard_warmup, ) - optimizer = optimizer_class( - params_to_optimize, - lr=args.learning_rate, - betas=(args.adam_beta1, args.adam_beta2), - weight_decay=args.adam_weight_decay, - eps=args.adam_epsilon, - decouple=args.prodigy_decouple, - use_bias_correction=args.prodigy_use_bias_correction, - safeguard_warmup=args.prodigy_safeguard_warmup, - ) - # Dataset and DataLoaders creation: train_dataset = DreamBoothDataset( instance_data_root=args.instance_data_dir, @@ -1472,7 +1464,12 @@ def compute_text_embeddings(prompt, text_encoders, tokenizers): accelerator.backward(loss) if accelerator.sync_gradients: - accelerator.clip_grad_norm_(params_to_optimize, args.max_grad_norm) + params_to_clip = ( + itertools.chain(unet_lora_parameters, text_lora_parameters_one, text_lora_parameters_two) + if args.train_text_encoder + else unet_lora_parameters + ) + accelerator.clip_grad_norm_(params_to_clip, args.max_grad_norm) optimizer.step() lr_scheduler.step() optimizer.zero_grad() From ada6ad896ed92bc27bf1ca8163c9b1a012bfb1b8 Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 29 Nov 2023 07:44:41 +0530 Subject: [PATCH 22/25] init_lora_weights to gaussian and add out proj where missing --- examples/dreambooth/train_dreambooth_lora.py | 10 ++++++++-- examples/dreambooth/train_dreambooth_lora_sdxl.py | 8 ++++++-- examples/text_to_image/train_text_to_image_lora.py | 4 +++- .../text_to_image/train_text_to_image_lora_sdxl.py | 8 ++++++-- 4 files changed, 23 insertions(+), 7 deletions(-) diff --git a/examples/dreambooth/train_dreambooth_lora.py b/examples/dreambooth/train_dreambooth_lora.py index 1962f9cf4dfd..b96cb01b442e 100644 --- a/examples/dreambooth/train_dreambooth_lora.py +++ b/examples/dreambooth/train_dreambooth_lora.py @@ -858,12 +858,18 @@ def main(args): text_encoder.gradient_checkpointing_enable() # now we will add new LoRA weights to the attention layers - unet_lora_config = LoraConfig(r=args.rank, target_modules=["to_k", "to_q", "to_v", "add_k_proj", "add_v_proj"]) + unet_lora_config = LoraConfig( + r=args.rank, + init_lora_weights="gaussian", + target_modules=["to_k", "to_q", "to_v", "to_out.0", "add_k_proj", "add_v_proj"], + ) unet.add_adapter(unet_lora_config) # The text encoder comes from πŸ€— transformers, we will also attach adapters to it. if args.train_text_encoder: - text_lora_config = LoraConfig(r=args.rank, target_modules=["q_proj", "k_proj", "v_proj", "out_proj"]) + text_lora_config = LoraConfig( + r=args.rank, init_lora_weights="gaussian", target_modules=["q_proj", "k_proj", "v_proj", "out_proj"] + ) text_encoder.add_adapter(text_lora_config) # create custom saving & loading hooks so that `accelerator.save_state(...)` serializes in a nice format diff --git a/examples/dreambooth/train_dreambooth_lora_sdxl.py b/examples/dreambooth/train_dreambooth_lora_sdxl.py index 529eec1c53b2..6a5c1ca9a642 100644 --- a/examples/dreambooth/train_dreambooth_lora_sdxl.py +++ b/examples/dreambooth/train_dreambooth_lora_sdxl.py @@ -1010,13 +1010,17 @@ def main(args): text_encoder_two.gradient_checkpointing_enable() # now we will add new LoRA weights to the attention layers - unet_lora_config = LoraConfig(r=args.rank, target_modules=["to_k", "to_q", "to_v", "to_out.0"]) + unet_lora_config = LoraConfig( + r=args.rank, init_lora_weights="gaussian", target_modules=["to_k", "to_q", "to_v", "to_out.0"] + ) unet.add_adapter(unet_lora_config) # The text encoder comes from πŸ€— transformers, so we cannot directly modify it. # So, instead, we monkey-patch the forward calls of its attention-blocks. if args.train_text_encoder: - text_lora_config = LoraConfig(r=args.rank, target_modules=["q_proj", "k_proj", "v_proj", "out_proj"]) + text_lora_config = LoraConfig( + r=args.rank, init_lora_weights="gaussian", target_modules=["q_proj", "k_proj", "v_proj", "out_proj"] + ) text_encoder_one.add_adapter(text_lora_config) text_encoder_two.add_adapter(text_lora_config) diff --git a/examples/text_to_image/train_text_to_image_lora.py b/examples/text_to_image/train_text_to_image_lora.py index 47eb0abd5bf3..d90441ff4de9 100644 --- a/examples/text_to_image/train_text_to_image_lora.py +++ b/examples/text_to_image/train_text_to_image_lora.py @@ -484,7 +484,9 @@ def main(): for param in unet.parameters(): param.requires_grad_(False) - unet_lora_config = LoraConfig(r=args.rank, target_modules=["to_k", "to_q", "to_v"]) + unet_lora_config = LoraConfig( + r=args.rank, init_lora_weights="gaussian", target_modules=["to_k", "to_q", "to_v", "to_out.0"] + ) # Move unet, vae and text_encoder to device and cast to weight_dtype unet.to(accelerator.device, dtype=weight_dtype) diff --git a/examples/text_to_image/train_text_to_image_lora_sdxl.py b/examples/text_to_image/train_text_to_image_lora_sdxl.py index 7f76057e9973..d025d6548cc5 100644 --- a/examples/text_to_image/train_text_to_image_lora_sdxl.py +++ b/examples/text_to_image/train_text_to_image_lora_sdxl.py @@ -658,14 +658,18 @@ def main(args): # now we will add new LoRA weights to the attention layers # Set correct lora layers - unet_lora_config = LoraConfig(r=args.rank, target_modules=["to_k", "to_q", "to_v"]) + unet_lora_config = LoraConfig( + r=args.rank, init_lora_weights="gaussian", target_modules=["to_k", "to_q", "to_v", "to_out.0"] + ) unet.add_adapter(unet_lora_config) # The text encoder comes from πŸ€— transformers, we will also attach adapters to it. if args.train_text_encoder: # ensure that dtype is float32, even if rest of the model that isn't trained is loaded in fp16 - text_lora_config = LoraConfig(r=args.rank, target_modules=["q_proj", "k_proj", "v_proj", "out_proj"]) + text_lora_config = LoraConfig( + r=args.rank, init_lora_weights="gaussian", target_modules=["q_proj", "k_proj", "v_proj", "out_proj"] + ) text_encoder_one.add_adapter(text_lora_config) text_encoder_two.add_adapter(text_lora_config) From 252dcdac0537aee30b0a56db18bbcca7a927a72e Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 29 Nov 2023 07:59:05 +0530 Subject: [PATCH 23/25] ammend requirements. --- examples/dreambooth/requirements_sdxl.txt | 2 +- examples/text_to_image/requirements_sdxl.txt | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/dreambooth/requirements_sdxl.txt b/examples/dreambooth/requirements_sdxl.txt index bf5ce39b8682..2f7b5060971a 100644 --- a/examples/dreambooth/requirements_sdxl.txt +++ b/examples/dreambooth/requirements_sdxl.txt @@ -4,4 +4,4 @@ transformers>=4.25.1 ftfy tensorboard Jinja2 -peft>=0.6.0 \ No newline at end of file +peft @ git+https://github.com/huggingface/peft.git \ No newline at end of file diff --git a/examples/text_to_image/requirements_sdxl.txt b/examples/text_to_image/requirements_sdxl.txt index cdd3336e3617..476e1d873d27 100644 --- a/examples/text_to_image/requirements_sdxl.txt +++ b/examples/text_to_image/requirements_sdxl.txt @@ -5,3 +5,4 @@ ftfy tensorboard Jinja2 datasets +peft @ git+https://github.com/huggingface/peft.git \ No newline at end of file From 90b760a61e08250e8e8ed3b53ce708f2fff4096f Mon Sep 17 00:00:00 2001 From: sayakpaul Date: Wed, 29 Nov 2023 08:02:32 +0530 Subject: [PATCH 24/25] ammend requirements.txt --- examples/dreambooth/requirements.txt | 2 +- examples/text_to_image/requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/dreambooth/requirements.txt b/examples/dreambooth/requirements.txt index bf5ce39b8682..2f7b5060971a 100644 --- a/examples/dreambooth/requirements.txt +++ b/examples/dreambooth/requirements.txt @@ -4,4 +4,4 @@ transformers>=4.25.1 ftfy tensorboard Jinja2 -peft>=0.6.0 \ No newline at end of file +peft @ git+https://github.com/huggingface/peft.git \ No newline at end of file diff --git a/examples/text_to_image/requirements.txt b/examples/text_to_image/requirements.txt index 6eb46d193dae..9394ad3354c6 100644 --- a/examples/text_to_image/requirements.txt +++ b/examples/text_to_image/requirements.txt @@ -5,4 +5,4 @@ datasets ftfy tensorboard Jinja2 -peft>=0.6.0 \ No newline at end of file +peft @ git+https://github.com/huggingface/peft.git \ No newline at end of file From 57516edb416bcbe13d61211d660773d334cbe5aa Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Wed, 6 Dec 2023 16:27:36 +0000 Subject: [PATCH 25/25] add correct peft versions --- examples/dreambooth/requirements.txt | 2 +- examples/dreambooth/requirements_sdxl.txt | 2 +- examples/text_to_image/requirements.txt | 2 +- examples/text_to_image/requirements_sdxl.txt | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/dreambooth/requirements.txt b/examples/dreambooth/requirements.txt index 2f7b5060971a..3f86855e1d1e 100644 --- a/examples/dreambooth/requirements.txt +++ b/examples/dreambooth/requirements.txt @@ -4,4 +4,4 @@ transformers>=4.25.1 ftfy tensorboard Jinja2 -peft @ git+https://github.com/huggingface/peft.git \ No newline at end of file +peft==0.7.0 \ No newline at end of file diff --git a/examples/dreambooth/requirements_sdxl.txt b/examples/dreambooth/requirements_sdxl.txt index 2f7b5060971a..3f86855e1d1e 100644 --- a/examples/dreambooth/requirements_sdxl.txt +++ b/examples/dreambooth/requirements_sdxl.txt @@ -4,4 +4,4 @@ transformers>=4.25.1 ftfy tensorboard Jinja2 -peft @ git+https://github.com/huggingface/peft.git \ No newline at end of file +peft==0.7.0 \ No newline at end of file diff --git a/examples/text_to_image/requirements.txt b/examples/text_to_image/requirements.txt index 9394ad3354c6..0dd164fc2035 100644 --- a/examples/text_to_image/requirements.txt +++ b/examples/text_to_image/requirements.txt @@ -5,4 +5,4 @@ datasets ftfy tensorboard Jinja2 -peft @ git+https://github.com/huggingface/peft.git \ No newline at end of file +peft==0.7.0 \ No newline at end of file diff --git a/examples/text_to_image/requirements_sdxl.txt b/examples/text_to_image/requirements_sdxl.txt index 476e1d873d27..64cbc9205fd0 100644 --- a/examples/text_to_image/requirements_sdxl.txt +++ b/examples/text_to_image/requirements_sdxl.txt @@ -5,4 +5,4 @@ ftfy tensorboard Jinja2 datasets -peft @ git+https://github.com/huggingface/peft.git \ No newline at end of file +peft==0.7.0 \ No newline at end of file