From 2cbc52a980a29eb2509d16d0a7fadbc1fd88777c Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Fri, 13 Oct 2023 16:01:03 +0000
Subject: [PATCH 01/25] adapt example scripts to use PEFT

---
 .../text_to_image/train_text_to_image_lora.py | 70 ++++++++-----------
 1 file changed, 30 insertions(+), 40 deletions(-)

diff --git a/examples/text_to_image/train_text_to_image_lora.py b/examples/text_to_image/train_text_to_image_lora.py
index eac0f18f49f4..e1e9798afb69 100644
--- a/examples/text_to_image/train_text_to_image_lora.py
+++ b/examples/text_to_image/train_text_to_image_lora.py
@@ -34,14 +34,14 @@
 from datasets import load_dataset
 from huggingface_hub import create_repo, upload_folder
 from packaging import version
+from peft import LoraConfig
+from peft.utils import get_peft_model_state_dict
 from torchvision import transforms
 from tqdm.auto import tqdm
 from transformers import CLIPTextModel, CLIPTokenizer
 
 import diffusers
-from diffusers import AutoencoderKL, DDPMScheduler, DiffusionPipeline, UNet2DConditionModel
-from diffusers.loaders import AttnProcsLayers
-from diffusers.models.attention_processor import LoRAAttnProcessor
+from diffusers import AutoencoderKL, DDPMScheduler, DiffusionPipeline, StableDiffusionPipeline, UNet2DConditionModel
 from diffusers.optimization import get_scheduler
 from diffusers.training_utils import compute_snr
 from diffusers.utils import check_min_version, is_wandb_available
@@ -439,44 +439,19 @@ def main():
     elif accelerator.mixed_precision == "bf16":
         weight_dtype = torch.bfloat16
 
+    for param in unet.parameters():
+        param.requires_grad_(False)
+
+    unet_lora_config = LoraConfig(
+        r=args.rank, target_modules=["conv1", "conv2", "conv_shortcut", "proj_in", "proj_out"]
+    )
+
     # Move unet, vae and text_encoder to device and cast to weight_dtype
     unet.to(accelerator.device, dtype=weight_dtype)
     vae.to(accelerator.device, dtype=weight_dtype)
     text_encoder.to(accelerator.device, dtype=weight_dtype)
 
-    # now we will add new LoRA weights to the attention layers
-    # It's important to realize here how many attention weights will be added and of which sizes
-    # The sizes of the attention layers consist only of two different variables:
-    # 1) - the "hidden_size", which is increased according to `unet.config.block_out_channels`.
-    # 2) - the "cross attention size", which is set to `unet.config.cross_attention_dim`.
-
-    # Let's first see how many attention processors we will have to set.
-    # For Stable Diffusion, it should be equal to:
-    # - down blocks (2x attention layers) * (2x transformer layers) * (3x down blocks) = 12
-    # - mid blocks (2x attention layers) * (1x transformer layers) * (1x mid blocks) = 2
-    # - up blocks (2x attention layers) * (3x transformer layers) * (3x down blocks) = 18
-    # => 32 layers
-
-    # Set correct lora layers
-    lora_attn_procs = {}
-    for name in unet.attn_processors.keys():
-        cross_attention_dim = None if name.endswith("attn1.processor") else unet.config.cross_attention_dim
-        if name.startswith("mid_block"):
-            hidden_size = unet.config.block_out_channels[-1]
-        elif name.startswith("up_blocks"):
-            block_id = int(name[len("up_blocks.")])
-            hidden_size = list(reversed(unet.config.block_out_channels))[block_id]
-        elif name.startswith("down_blocks"):
-            block_id = int(name[len("down_blocks.")])
-            hidden_size = unet.config.block_out_channels[block_id]
-
-        lora_attn_procs[name] = LoRAAttnProcessor(
-            hidden_size=hidden_size,
-            cross_attention_dim=cross_attention_dim,
-            rank=args.rank,
-        )
-
-    unet.set_attn_processor(lora_attn_procs)
+    unet.add_adapter(unet_lora_config)
 
     if args.enable_xformers_memory_efficient_attention:
         if is_xformers_available():
@@ -491,7 +466,7 @@ def main():
         else:
             raise ValueError("xformers is not available. Make sure it is installed correctly")
 
-    lora_layers = AttnProcsLayers(unet.attn_processors)
+    lora_layers = filter(lambda p: p.requires_grad, unet.parameters())
 
     # Enable TF32 for faster training on Ampere GPUs,
     # cf https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices
@@ -517,7 +492,7 @@ def main():
         optimizer_cls = torch.optim.AdamW
 
     optimizer = optimizer_cls(
-        lora_layers.parameters(),
+        lora_layers,
         lr=args.learning_rate,
         betas=(args.adam_beta1, args.adam_beta2),
         weight_decay=args.adam_weight_decay,
@@ -777,7 +752,7 @@ def collate_fn(examples):
                 # Backpropagate
                 accelerator.backward(loss)
                 if accelerator.sync_gradients:
-                    params_to_clip = lora_layers.parameters()
+                    params_to_clip = lora_layers
                     accelerator.clip_grad_norm_(params_to_clip, args.max_grad_norm)
                 optimizer.step()
                 lr_scheduler.step()
@@ -814,6 +789,15 @@ def collate_fn(examples):
 
                         save_path = os.path.join(args.output_dir, f"checkpoint-{global_step}")
                         accelerator.save_state(save_path)
+
+                        unet_lora_state_dict = get_peft_model_state_dict(unet)
+
+                        StableDiffusionPipeline.save_lora_weights(
+                            save_directory=save_path,
+                            unet_lora_layers=unet_lora_state_dict,
+                            safe_serialization=True,
+                        )
+
                         logger.info(f"Saved state to {save_path}")
 
             logs = {"step_loss": loss.detach().item(), "lr": lr_scheduler.get_last_lr()[0]}
@@ -869,7 +853,13 @@ def collate_fn(examples):
     accelerator.wait_for_everyone()
     if accelerator.is_main_process:
         unet = unet.to(torch.float32)
-        unet.save_attn_procs(args.output_dir)
+
+        unet_lora_state_dict = get_peft_model_state_dict(unet)
+        DiffusionPipeline.save_pretrained(
+            args.output_dir,
+            unet_lora_layers=unet_lora_state_dict,
+            safe_serialization=True,
+        )
 
         if args.push_to_hub:
             save_model_card(

From b86543fe333fc3b759babe95fe1eb5181ac45cac Mon Sep 17 00:00:00 2001
From: Younes Belkada <49240599+younesbelkada@users.noreply.github.com>
Date: Fri, 13 Oct 2023 18:03:52 +0200
Subject: [PATCH 02/25] Update
 examples/text_to_image/train_text_to_image_lora.py

---
 examples/text_to_image/train_text_to_image_lora.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/text_to_image/train_text_to_image_lora.py b/examples/text_to_image/train_text_to_image_lora.py
index e1e9798afb69..44cdb60a2ec6 100644
--- a/examples/text_to_image/train_text_to_image_lora.py
+++ b/examples/text_to_image/train_text_to_image_lora.py
@@ -439,6 +439,7 @@ def main():
     elif accelerator.mixed_precision == "bf16":
         weight_dtype = torch.bfloat16
 
+    # Freeze the unet parameters before adding adapters
     for param in unet.parameters():
         param.requires_grad_(False)
 

From af99c1258337eff263419d470a8aff15b21d8c38 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Mon, 16 Oct 2023 21:46:58 +0000
Subject: [PATCH 03/25] fix

---
 examples/text_to_image/train_text_to_image_lora.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/text_to_image/train_text_to_image_lora.py b/examples/text_to_image/train_text_to_image_lora.py
index 44cdb60a2ec6..3462c74be4b6 100644
--- a/examples/text_to_image/train_text_to_image_lora.py
+++ b/examples/text_to_image/train_text_to_image_lora.py
@@ -856,8 +856,8 @@ def collate_fn(examples):
         unet = unet.to(torch.float32)
 
         unet_lora_state_dict = get_peft_model_state_dict(unet)
-        DiffusionPipeline.save_pretrained(
-            args.output_dir,
+        StableDiffusionPipeline.save_lora_weights(
+            save_directory=args.output_dir,
             unet_lora_layers=unet_lora_state_dict,
             safe_serialization=True,
         )

From 89d4bed7af29ee5fe824061e043465893319471b Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Mon, 23 Oct 2023 16:17:48 +0000
Subject: [PATCH 04/25] add for SDXL

---
 .../text_to_image/train_text_to_image_lora.py |  4 +-
 .../train_text_to_image_lora_sdxl.py          | 70 ++++---------------
 2 files changed, 15 insertions(+), 59 deletions(-)

diff --git a/examples/text_to_image/train_text_to_image_lora.py b/examples/text_to_image/train_text_to_image_lora.py
index 3462c74be4b6..d25aa6047dcb 100644
--- a/examples/text_to_image/train_text_to_image_lora.py
+++ b/examples/text_to_image/train_text_to_image_lora.py
@@ -443,9 +443,7 @@ def main():
     for param in unet.parameters():
         param.requires_grad_(False)
 
-    unet_lora_config = LoraConfig(
-        r=args.rank, target_modules=["conv1", "conv2", "conv_shortcut", "proj_in", "proj_out"]
-    )
+    unet_lora_config = LoraConfig(r=args.rank, target_modules=["to_k", "to_q", "to_v"])
 
     # Move unet, vae and text_encoder to device and cast to weight_dtype
     unet.to(accelerator.device, dtype=weight_dtype)
diff --git a/examples/text_to_image/train_text_to_image_lora_sdxl.py b/examples/text_to_image/train_text_to_image_lora_sdxl.py
index 35de6eedcabd..a523a78fd171 100644
--- a/examples/text_to_image/train_text_to_image_lora_sdxl.py
+++ b/examples/text_to_image/train_text_to_image_lora_sdxl.py
@@ -41,6 +41,8 @@
 from torchvision.transforms.functional import crop
 from tqdm.auto import tqdm
 from transformers import AutoTokenizer, PretrainedConfig
+from peft import LoraConfig
+from peft.utils import get_peft_model_state_dict
 
 import diffusers
 from diffusers import (
@@ -609,53 +611,17 @@ def main(args):
 
     # now we will add new LoRA weights to the attention layers
     # Set correct lora layers
-    unet_lora_parameters = []
-    for attn_processor_name, attn_processor in unet.attn_processors.items():
-        # Parse the attention module.
-        attn_module = unet
-        for n in attn_processor_name.split(".")[:-1]:
-            attn_module = getattr(attn_module, n)
-
-        # Set the `lora_layer` attribute of the attention-related matrices.
-        attn_module.to_q.set_lora_layer(
-            LoRALinearLayer(
-                in_features=attn_module.to_q.in_features, out_features=attn_module.to_q.out_features, rank=args.rank
-            )
-        )
-        attn_module.to_k.set_lora_layer(
-            LoRALinearLayer(
-                in_features=attn_module.to_k.in_features, out_features=attn_module.to_k.out_features, rank=args.rank
-            )
-        )
-        attn_module.to_v.set_lora_layer(
-            LoRALinearLayer(
-                in_features=attn_module.to_v.in_features, out_features=attn_module.to_v.out_features, rank=args.rank
-            )
-        )
-        attn_module.to_out[0].set_lora_layer(
-            LoRALinearLayer(
-                in_features=attn_module.to_out[0].in_features,
-                out_features=attn_module.to_out[0].out_features,
-                rank=args.rank,
-            )
-        )
+    unet_lora_config = LoraConfig(r=args.rank, target_modules=["to_k", "to_q", "to_v"])
 
-        # Accumulate the LoRA params to optimize.
-        unet_lora_parameters.extend(attn_module.to_q.lora_layer.parameters())
-        unet_lora_parameters.extend(attn_module.to_k.lora_layer.parameters())
-        unet_lora_parameters.extend(attn_module.to_v.lora_layer.parameters())
-        unet_lora_parameters.extend(attn_module.to_out[0].lora_layer.parameters())
+    unet.add_adapter(unet_lora_config)
 
     # The text encoder comes from 🤗 transformers, so we cannot directly modify it.
     # So, instead, we monkey-patch the forward calls of its attention-blocks.
     if args.train_text_encoder:
         # ensure that dtype is float32, even if rest of the model that isn't trained is loaded in fp16
-        text_lora_parameters_one = LoraLoaderMixin._modify_text_encoder(
-            text_encoder_one, dtype=torch.float32, rank=args.rank
-        )
-        text_lora_parameters_two = LoraLoaderMixin._modify_text_encoder(
-            text_encoder_two, dtype=torch.float32, rank=args.rank
-        )
+        text_lora_config = LoraConfig(r=args.rank, target_modules=["q_proj", "k_proj", "v_proj"])
+        text_encoder_one.add_adapter(text_lora_config)
+        text_encoder_two.add_adapter(text_lora_config)
 
     # create custom saving & loading hooks so that `accelerator.save_state(...)` serializes in a nice format
     def save_model_hook(models, weights, output_dir):
@@ -743,11 +709,7 @@ def load_model_hook(models, input_dir):
         optimizer_class = torch.optim.AdamW
 
     # Optimizer creation
-    params_to_optimize = (
-        itertools.chain(unet_lora_parameters, text_lora_parameters_one, text_lora_parameters_two)
-        if args.train_text_encoder
-        else unet_lora_parameters
-    )
+    params_to_optimize = filter(lambda p: p.requires_grad, unet.parameters())
     optimizer = optimizer_class(
         params_to_optimize,
         lr=args.learning_rate,
@@ -1081,12 +1043,7 @@ def compute_time_ids(original_size, crops_coords_top_left):
                 # Backpropagate
                 accelerator.backward(loss)
                 if accelerator.sync_gradients:
-                    params_to_clip = (
-                        itertools.chain(unet_lora_parameters, text_lora_parameters_one, text_lora_parameters_two)
-                        if args.train_text_encoder
-                        else unet_lora_parameters
-                    )
-                    accelerator.clip_grad_norm_(params_to_clip, args.max_grad_norm)
+                    accelerator.clip_grad_norm_(params_to_optimize, args.max_grad_norm)
                 optimizer.step()
                 lr_scheduler.step()
                 optimizer.zero_grad()
@@ -1181,20 +1138,21 @@ def compute_time_ids(original_size, crops_coords_top_left):
     accelerator.wait_for_everyone()
     if accelerator.is_main_process:
         unet = accelerator.unwrap_model(unet)
-        unet_lora_layers = unet_attn_processors_state_dict(unet)
+        unet_lora_state_dict = get_peft_model_state_dict(unet)
 
         if args.train_text_encoder:
             text_encoder_one = accelerator.unwrap_model(text_encoder_one)
             text_encoder_lora_layers = text_encoder_lora_state_dict(text_encoder_one)
-            text_encoder_two = accelerator.unwrap_model(text_encoder_two)
-            text_encoder_2_lora_layers = text_encoder_lora_state_dict(text_encoder_two)
+
+            text_encoder_lora_layers = get_peft_model_state_dict(text_encoder_one)
+            text_encoder_lora_layers = get_peft_model_state_dict(text_encoder_2_lora_layers)
         else:
             text_encoder_lora_layers = None
             text_encoder_2_lora_layers = None
 
         StableDiffusionXLPipeline.save_lora_weights(
             save_directory=args.output_dir,
-            unet_lora_layers=unet_lora_layers,
+            unet_lora_layers=unet_lora_state_dict,
             text_encoder_lora_layers=text_encoder_lora_layers,
             text_encoder_2_lora_layers=text_encoder_2_lora_layers,
         )

From 428191373483df37f0be4ef3c0a29bdfdc739bcb Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Mon, 23 Oct 2023 16:18:19 +0000
Subject: [PATCH 05/25] oops

---
 examples/text_to_image/train_text_to_image_lora_sdxl.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/examples/text_to_image/train_text_to_image_lora_sdxl.py b/examples/text_to_image/train_text_to_image_lora_sdxl.py
index a523a78fd171..0987d38b1c79 100644
--- a/examples/text_to_image/train_text_to_image_lora_sdxl.py
+++ b/examples/text_to_image/train_text_to_image_lora_sdxl.py
@@ -16,7 +16,6 @@
 """Fine-tuning script for Stable Diffusion XL for text2image with support for LoRA."""
 
 import argparse
-import itertools
 import logging
 import math
 import os
@@ -37,12 +36,12 @@
 from datasets import load_dataset
 from huggingface_hub import create_repo, upload_folder
 from packaging import version
+from peft import LoraConfig
+from peft.utils import get_peft_model_state_dict
 from torchvision import transforms
 from torchvision.transforms.functional import crop
 from tqdm.auto import tqdm
 from transformers import AutoTokenizer, PretrainedConfig
-from peft import LoraConfig
-from peft.utils import get_peft_model_state_dict
 
 import diffusers
 from diffusers import (
@@ -52,7 +51,6 @@
     UNet2DConditionModel,
 )
 from diffusers.loaders import LoraLoaderMixin, text_encoder_lora_state_dict
-from diffusers.models.lora import LoRALinearLayer
 from diffusers.optimization import get_scheduler
 from diffusers.training_utils import compute_snr
 from diffusers.utils import check_min_version, is_wandb_available
@@ -1145,7 +1143,7 @@ def compute_time_ids(original_size, crops_coords_top_left):
             text_encoder_lora_layers = text_encoder_lora_state_dict(text_encoder_one)
 
             text_encoder_lora_layers = get_peft_model_state_dict(text_encoder_one)
-            text_encoder_lora_layers = get_peft_model_state_dict(text_encoder_2_lora_layers)
+            text_encoder_2_lora_layers = get_peft_model_state_dict(text_encoder_two)
         else:
             text_encoder_lora_layers = None
             text_encoder_2_lora_layers = None

From 6a48ad050d01d54f3f9e5ba66bcc7a2d6197a9e1 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Sun, 5 Nov 2023 07:25:37 +0000
Subject: [PATCH 06/25] make sure to install peft

---
 .github/workflows/pr_tests.yml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/.github/workflows/pr_tests.yml b/.github/workflows/pr_tests.yml
index aaaea147f7ab..a75480b868b2 100644
--- a/.github/workflows/pr_tests.yml
+++ b/.github/workflows/pr_tests.yml
@@ -113,6 +113,7 @@ jobs:
     - name: Run example PyTorch CPU tests
       if: ${{ matrix.config.framework == 'pytorch_examples' }}
       run: |
+        python -m pip install peft
         python -m pytest -n 2 --max-worker-restart=0 --dist=loadfile \
           --make-reports=tests_${{ matrix.config.report }} \
           examples/test_examples.py 

From 069a929f95920228dc516d28b81c9fd45cf349c6 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Sun, 5 Nov 2023 07:41:47 +0000
Subject: [PATCH 07/25] fix

---
 examples/text_to_image/train_text_to_image_lora_sdxl.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/text_to_image/train_text_to_image_lora_sdxl.py b/examples/text_to_image/train_text_to_image_lora_sdxl.py
index 466550dd1981..c3495a361059 100644
--- a/examples/text_to_image/train_text_to_image_lora_sdxl.py
+++ b/examples/text_to_image/train_text_to_image_lora_sdxl.py
@@ -1141,7 +1141,7 @@ def compute_time_ids(original_size, crops_coords_top_left):
 
         if args.train_text_encoder:
             text_encoder_one = accelerator.unwrap_model(text_encoder_one)
-            text_encoder_lora_layers = text_encoder_lora_state_dict(text_encoder_one)
+            text_encoder_two = accelerator.unwrap_model(text_encoder_two)
 
             text_encoder_lora_layers = get_peft_model_state_dict(text_encoder_one)
             text_encoder_2_lora_layers = get_peft_model_state_dict(text_encoder_two)

From e4b0f1dcc60fa9597721c3df1ad108886f0a84ac Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Tue, 14 Nov 2023 13:38:27 +0000
Subject: [PATCH 08/25] fix

---
 examples/text_to_image/train_text_to_image_lora_sdxl.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/examples/text_to_image/train_text_to_image_lora_sdxl.py b/examples/text_to_image/train_text_to_image_lora_sdxl.py
index b9920ea8fcec..12ca4bef6000 100644
--- a/examples/text_to_image/train_text_to_image_lora_sdxl.py
+++ b/examples/text_to_image/train_text_to_image_lora_sdxl.py
@@ -51,7 +51,6 @@
     UNet2DConditionModel,
 )
 from diffusers.loaders import LoraLoaderMixin
-from diffusers.models.lora import LoRALinearLayer, text_encoder_lora_state_dict
 from diffusers.optimization import get_scheduler
 from diffusers.training_utils import compute_snr
 from diffusers.utils import check_min_version, is_wandb_available
@@ -619,7 +618,7 @@ def main(args):
     # So, instead, we monkey-patch the forward calls of its attention-blocks.
     if args.train_text_encoder:
         # ensure that dtype is float32, even if rest of the model that isn't trained is loaded in fp16
-        text_lora_config = LoraConfig(r=args.rank, target_modules=["q_proj", "k_proj", "v_proj"])
+        text_lora_config = LoraConfig(r=args.rank, target_modules=["q_proj", "k_proj", "v_proj", "out_proj"])
         text_encoder_one.add_adapter(text_lora_config)
         text_encoder_two.add_adapter(text_lora_config)
 
@@ -634,11 +633,11 @@ def save_model_hook(models, weights, output_dir):
 
             for model in models:
                 if isinstance(model, type(accelerator.unwrap_model(unet))):
-                    unet_lora_layers_to_save = unet_attn_processors_state_dict(model)
+                    unet_lora_layers_to_save = get_peft_model_state_dict(model)
                 elif isinstance(model, type(accelerator.unwrap_model(text_encoder_one))):
-                    text_encoder_one_lora_layers_to_save = text_encoder_lora_state_dict(model)
+                    text_encoder_one_lora_layers_to_save = get_peft_model_state_dict(model)
                 elif isinstance(model, type(accelerator.unwrap_model(text_encoder_two))):
-                    text_encoder_two_lora_layers_to_save = text_encoder_lora_state_dict(model)
+                    text_encoder_two_lora_layers_to_save = get_peft_model_state_dict(model)
                 else:
                     raise ValueError(f"unexpected save model: {model.__class__}")
 

From 62c33c0ba8c09bb6929a071fe3bffd490ccacaa2 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Tue, 14 Nov 2023 13:50:52 +0000
Subject: [PATCH 09/25] fix dreambooth and lora

---
 examples/dreambooth/train_dreambooth_lora.py  | 107 ++++--------------
 .../train_text_to_image_lora_sdxl.py          |   7 +-
 2 files changed, 23 insertions(+), 91 deletions(-)

diff --git a/examples/dreambooth/train_dreambooth_lora.py b/examples/dreambooth/train_dreambooth_lora.py
index d10e62ac8def..911a7023ac45 100644
--- a/examples/dreambooth/train_dreambooth_lora.py
+++ b/examples/dreambooth/train_dreambooth_lora.py
@@ -63,6 +63,9 @@
 from diffusers.utils import check_min_version, is_wandb_available
 from diffusers.utils.import_utils import is_xformers_available
 
+from peft import LoraConfig
+from peft.utils import get_peft_model_state_dict
+
 
 # Will error if the minimal version of diffusers is not installed. Remove at your own risks.
 check_min_version("0.24.0.dev0")
@@ -823,80 +826,16 @@ def main(args):
         if args.train_text_encoder:
             text_encoder.gradient_checkpointing_enable()
 
-    # now we will add new LoRA weights to the attention layers
-    # It's important to realize here how many attention weights will be added and of which sizes
-    # The sizes of the attention layers consist only of two different variables:
-    # 1) - the "hidden_size", which is increased according to `unet.config.block_out_channels`.
-    # 2) - the "cross attention size", which is set to `unet.config.cross_attention_dim`.
-
-    # Let's first see how many attention processors we will have to set.
-    # For Stable Diffusion, it should be equal to:
-    # - down blocks (2x attention layers) * (2x transformer layers) * (3x down blocks) = 12
-    # - mid blocks (2x attention layers) * (1x transformer layers) * (1x mid blocks) = 2
-    # - up blocks (2x attention layers) * (3x transformer layers) * (3x up blocks) = 18
-    # => 32 layers
-
-    # Set correct lora layers
-    unet_lora_parameters = []
-    for attn_processor_name, attn_processor in unet.attn_processors.items():
-        # Parse the attention module.
-        attn_module = unet
-        for n in attn_processor_name.split(".")[:-1]:
-            attn_module = getattr(attn_module, n)
-
-        # Set the `lora_layer` attribute of the attention-related matrices.
-        attn_module.to_q.set_lora_layer(
-            LoRALinearLayer(
-                in_features=attn_module.to_q.in_features, out_features=attn_module.to_q.out_features, rank=args.rank
-            )
-        )
-        attn_module.to_k.set_lora_layer(
-            LoRALinearLayer(
-                in_features=attn_module.to_k.in_features, out_features=attn_module.to_k.out_features, rank=args.rank
-            )
-        )
-        attn_module.to_v.set_lora_layer(
-            LoRALinearLayer(
-                in_features=attn_module.to_v.in_features, out_features=attn_module.to_v.out_features, rank=args.rank
-            )
-        )
-        attn_module.to_out[0].set_lora_layer(
-            LoRALinearLayer(
-                in_features=attn_module.to_out[0].in_features,
-                out_features=attn_module.to_out[0].out_features,
-                rank=args.rank,
-            )
-        )
 
-        # Accumulate the LoRA params to optimize.
-        unet_lora_parameters.extend(attn_module.to_q.lora_layer.parameters())
-        unet_lora_parameters.extend(attn_module.to_k.lora_layer.parameters())
-        unet_lora_parameters.extend(attn_module.to_v.lora_layer.parameters())
-        unet_lora_parameters.extend(attn_module.to_out[0].lora_layer.parameters())
-
-        if isinstance(attn_processor, (AttnAddedKVProcessor, SlicedAttnAddedKVProcessor, AttnAddedKVProcessor2_0)):
-            attn_module.add_k_proj.set_lora_layer(
-                LoRALinearLayer(
-                    in_features=attn_module.add_k_proj.in_features,
-                    out_features=attn_module.add_k_proj.out_features,
-                    rank=args.rank,
-                )
-            )
-            attn_module.add_v_proj.set_lora_layer(
-                LoRALinearLayer(
-                    in_features=attn_module.add_v_proj.in_features,
-                    out_features=attn_module.add_v_proj.out_features,
-                    rank=args.rank,
-                )
-            )
-            unet_lora_parameters.extend(attn_module.add_k_proj.lora_layer.parameters())
-            unet_lora_parameters.extend(attn_module.add_v_proj.lora_layer.parameters())
+    # now we will add new LoRA weights to the attention layers
+    unet_lora_config = LoraConfig(r=args.rank, target_modules=["to_k", "to_q", "to_v", "add_k_proj", "add_v_proj"])
+    unet.add_adapter(unet_lora_config)
 
-    # The text encoder comes from 🤗 transformers, so we cannot directly modify it.
-    # So, instead, we monkey-patch the forward calls of its attention-blocks.
+    # The text encoder comes from 🤗 transformers, we will also attach adapters to it.
     if args.train_text_encoder:
         # ensure that dtype is float32, even if rest of the model that isn't trained is loaded in fp16
-        text_lora_parameters = LoraLoaderMixin._modify_text_encoder(text_encoder, dtype=torch.float32, rank=args.rank)
+        text_lora_config = LoraConfig(r=args.rank, target_modules=["q_proj", "k_proj", "v_proj", "out_proj"])
+        text_encoder.add_adapter(text_lora_config)
 
     # create custom saving & loading hooks so that `accelerator.save_state(...)` serializes in a nice format
     def save_model_hook(models, weights, output_dir):
@@ -970,11 +909,10 @@ def load_model_hook(models, input_dir):
         optimizer_class = torch.optim.AdamW
 
     # Optimizer creation
-    params_to_optimize = (
-        itertools.chain(unet_lora_parameters, text_lora_parameters)
-        if args.train_text_encoder
-        else unet_lora_parameters
-    )
+    params_to_optimize = list(filter(lambda p: p.requires_grad, unet.parameters()))
+    if args.train_text_encoder:
+        params_to_optimize = params_to_optimize + list(filter(lambda p: p.requires_grad, text_encoder.parameters()))
+
     optimizer = optimizer_class(
         params_to_optimize,
         lr=args.learning_rate,
@@ -1217,12 +1155,7 @@ def compute_text_embeddings(prompt):
 
                 accelerator.backward(loss)
                 if accelerator.sync_gradients:
-                    params_to_clip = (
-                        itertools.chain(unet_lora_parameters, text_lora_parameters)
-                        if args.train_text_encoder
-                        else unet_lora_parameters
-                    )
-                    accelerator.clip_grad_norm_(params_to_clip, args.max_grad_norm)
+                    accelerator.clip_grad_norm_(params_to_optimize, args.max_grad_norm)
                 optimizer.step()
                 lr_scheduler.step()
                 optimizer.zero_grad()
@@ -1344,18 +1277,16 @@ def compute_text_embeddings(prompt):
     if accelerator.is_main_process:
         unet = accelerator.unwrap_model(unet)
         unet = unet.to(torch.float32)
-        unet_lora_layers = unet_lora_state_dict(unet)
 
-        if text_encoder is not None and args.train_text_encoder:
+        unet_lora_state_dict = get_peft_model_state_dict(unet)
+
+        if args.train_text_encoder:
             text_encoder = accelerator.unwrap_model(text_encoder)
-            text_encoder = text_encoder.to(torch.float32)
-            text_encoder_lora_layers = text_encoder_lora_state_dict(text_encoder)
-        else:
-            text_encoder_lora_layers = None
+            text_encoder_lora_layers = get_peft_model_state_dict(text_encoder)
 
         LoraLoaderMixin.save_lora_weights(
             save_directory=args.output_dir,
-            unet_lora_layers=unet_lora_layers,
+            unet_lora_layers=unet_lora_state_dict,
             text_encoder_lora_layers=text_encoder_lora_layers,
         )
 
diff --git a/examples/text_to_image/train_text_to_image_lora_sdxl.py b/examples/text_to_image/train_text_to_image_lora_sdxl.py
index 12ca4bef6000..0e3f65842bb7 100644
--- a/examples/text_to_image/train_text_to_image_lora_sdxl.py
+++ b/examples/text_to_image/train_text_to_image_lora_sdxl.py
@@ -614,8 +614,7 @@ def main(args):
 
     unet.add_adapter(unet_lora_config)
 
-    # The text encoder comes from 🤗 transformers, so we cannot directly modify it.
-    # So, instead, we monkey-patch the forward calls of its attention-blocks.
+    # The text encoder comes from 🤗 transformers, we will also attach adapters to it.
     if args.train_text_encoder:
         # ensure that dtype is float32, even if rest of the model that isn't trained is loaded in fp16
         text_lora_config = LoraConfig(r=args.rank, target_modules=["q_proj", "k_proj", "v_proj", "out_proj"])
@@ -708,7 +707,9 @@ def load_model_hook(models, input_dir):
         optimizer_class = torch.optim.AdamW
 
     # Optimizer creation
-    params_to_optimize = filter(lambda p: p.requires_grad, unet.parameters())
+    params_to_optimize = list(filter(lambda p: p.requires_grad, unet.parameters()))
+    if args.train_text_encoder:
+        params_to_optimize = params_to_optimize + list(filter(lambda p: p.requires_grad, text_encoder_one.parameters())) + list(filter(lambda p: p.requires_grad, text_encoder_two.parameters()))
     optimizer = optimizer_class(
         params_to_optimize,
         lr=args.learning_rate,

From a1e1cdffb9a41cb71dceeed7f32427814f42f2d8 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Tue, 14 Nov 2023 13:51:53 +0000
Subject: [PATCH 10/25] more fixes

---
 examples/dreambooth/train_dreambooth_lora.py   | 18 ++++--------------
 .../train_text_to_image_lora_sdxl.py           |  6 +++++-
 2 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/examples/dreambooth/train_dreambooth_lora.py b/examples/dreambooth/train_dreambooth_lora.py
index 911a7023ac45..e43a8135bba8 100644
--- a/examples/dreambooth/train_dreambooth_lora.py
+++ b/examples/dreambooth/train_dreambooth_lora.py
@@ -17,7 +17,6 @@
 import copy
 import gc
 import hashlib
-import itertools
 import logging
 import math
 import os
@@ -35,6 +34,8 @@
 from accelerate.utils import ProjectConfiguration, set_seed
 from huggingface_hub import create_repo, upload_folder
 from packaging import version
+from peft import LoraConfig
+from peft.utils import get_peft_model_state_dict
 from PIL import Image
 from PIL.ImageOps import exif_transpose
 from torch.utils.data import Dataset
@@ -52,20 +53,10 @@
     UNet2DConditionModel,
 )
 from diffusers.loaders import LoraLoaderMixin
-from diffusers.models.attention_processor import (
-    AttnAddedKVProcessor,
-    AttnAddedKVProcessor2_0,
-    SlicedAttnAddedKVProcessor,
-)
-from diffusers.models.lora import LoRALinearLayer, text_encoder_lora_state_dict
 from diffusers.optimization import get_scheduler
-from diffusers.training_utils import unet_lora_state_dict
 from diffusers.utils import check_min_version, is_wandb_available
 from diffusers.utils.import_utils import is_xformers_available
 
-from peft import LoraConfig
-from peft.utils import get_peft_model_state_dict
-
 
 # Will error if the minimal version of diffusers is not installed. Remove at your own risks.
 check_min_version("0.24.0.dev0")
@@ -826,7 +817,6 @@ def main(args):
         if args.train_text_encoder:
             text_encoder.gradient_checkpointing_enable()
 
-
     # now we will add new LoRA weights to the attention layers
     unet_lora_config = LoraConfig(r=args.rank, target_modules=["to_k", "to_q", "to_v", "add_k_proj", "add_v_proj"])
     unet.add_adapter(unet_lora_config)
@@ -847,9 +837,9 @@ def save_model_hook(models, weights, output_dir):
 
             for model in models:
                 if isinstance(model, type(accelerator.unwrap_model(unet))):
-                    unet_lora_layers_to_save = unet_lora_state_dict(model)
+                    unet_lora_layers_to_save = get_peft_model_state_dict(model)
                 elif isinstance(model, type(accelerator.unwrap_model(text_encoder))):
-                    text_encoder_lora_layers_to_save = text_encoder_lora_state_dict(model)
+                    text_encoder_lora_layers_to_save = get_peft_model_state_dict(model)
                 else:
                     raise ValueError(f"unexpected save model: {model.__class__}")
 
diff --git a/examples/text_to_image/train_text_to_image_lora_sdxl.py b/examples/text_to_image/train_text_to_image_lora_sdxl.py
index 0e3f65842bb7..d6485ef3dda9 100644
--- a/examples/text_to_image/train_text_to_image_lora_sdxl.py
+++ b/examples/text_to_image/train_text_to_image_lora_sdxl.py
@@ -709,7 +709,11 @@ def load_model_hook(models, input_dir):
     # Optimizer creation
     params_to_optimize = list(filter(lambda p: p.requires_grad, unet.parameters()))
     if args.train_text_encoder:
-        params_to_optimize = params_to_optimize + list(filter(lambda p: p.requires_grad, text_encoder_one.parameters())) + list(filter(lambda p: p.requires_grad, text_encoder_two.parameters()))
+        params_to_optimize = (
+            params_to_optimize
+            + list(filter(lambda p: p.requires_grad, text_encoder_one.parameters()))
+            + list(filter(lambda p: p.requires_grad, text_encoder_two.parameters()))
+        )
     optimizer = optimizer_class(
         params_to_optimize,
         lr=args.learning_rate,

From c3d3002d3ac9dc32da7303631b1ee9811eb2503f Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Tue, 14 Nov 2023 13:52:44 +0000
Subject: [PATCH 11/25] add peft to requirements.txt

---
 examples/dreambooth/requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/dreambooth/requirements.txt b/examples/dreambooth/requirements.txt
index 7a612982f4ab..75bf0a4a7920 100644
--- a/examples/dreambooth/requirements.txt
+++ b/examples/dreambooth/requirements.txt
@@ -4,3 +4,4 @@ transformers>=4.25.1
 ftfy
 tensorboard
 Jinja2
+peft
\ No newline at end of file

From 340150b1010dd4e2ee7ff7e7f52a5ef48047927d Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Tue, 14 Nov 2023 14:20:35 +0000
Subject: [PATCH 12/25] fix

---
 examples/dreambooth/train_dreambooth_lora.py  |  2 +
 .../dreambooth/train_dreambooth_lora_sdxl.py  | 85 +++++--------------
 2 files changed, 22 insertions(+), 65 deletions(-)

diff --git a/examples/dreambooth/train_dreambooth_lora.py b/examples/dreambooth/train_dreambooth_lora.py
index e43a8135bba8..809e769d5d1a 100644
--- a/examples/dreambooth/train_dreambooth_lora.py
+++ b/examples/dreambooth/train_dreambooth_lora.py
@@ -1273,6 +1273,8 @@ def compute_text_embeddings(prompt):
         if args.train_text_encoder:
             text_encoder = accelerator.unwrap_model(text_encoder)
             text_encoder_lora_layers = get_peft_model_state_dict(text_encoder)
+        else:
+            text_encoder_lora_layers = None
 
         LoraLoaderMixin.save_lora_weights(
             save_directory=args.output_dir,
diff --git a/examples/dreambooth/train_dreambooth_lora_sdxl.py b/examples/dreambooth/train_dreambooth_lora_sdxl.py
index ef2020398b2d..dbc4babb85d5 100644
--- a/examples/dreambooth/train_dreambooth_lora_sdxl.py
+++ b/examples/dreambooth/train_dreambooth_lora_sdxl.py
@@ -16,7 +16,6 @@
 import argparse
 import gc
 import hashlib
-import itertools
 import logging
 import math
 import os
@@ -34,6 +33,8 @@
 from accelerate.utils import DistributedDataParallelKwargs, ProjectConfiguration, set_seed
 from huggingface_hub import create_repo, upload_folder
 from packaging import version
+from peft import LoraConfig
+from peft.utils import get_peft_model_state_dict
 from PIL import Image
 from PIL.ImageOps import exif_transpose
 from torch.utils.data import Dataset
@@ -50,9 +51,7 @@
     UNet2DConditionModel,
 )
 from diffusers.loaders import LoraLoaderMixin
-from diffusers.models.lora import LoRALinearLayer, text_encoder_lora_state_dict
 from diffusers.optimization import get_scheduler
-from diffusers.training_utils import unet_lora_state_dict
 from diffusers.utils import check_min_version, is_wandb_available
 from diffusers.utils.import_utils import is_xformers_available
 
@@ -745,54 +744,15 @@ def main(args):
             text_encoder_two.gradient_checkpointing_enable()
 
     # now we will add new LoRA weights to the attention layers
-    # Set correct lora layers
-    unet_lora_parameters = []
-    for attn_processor_name, attn_processor in unet.attn_processors.items():
-        # Parse the attention module.
-        attn_module = unet
-        for n in attn_processor_name.split(".")[:-1]:
-            attn_module = getattr(attn_module, n)
-
-        # Set the `lora_layer` attribute of the attention-related matrices.
-        attn_module.to_q.set_lora_layer(
-            LoRALinearLayer(
-                in_features=attn_module.to_q.in_features, out_features=attn_module.to_q.out_features, rank=args.rank
-            )
-        )
-        attn_module.to_k.set_lora_layer(
-            LoRALinearLayer(
-                in_features=attn_module.to_k.in_features, out_features=attn_module.to_k.out_features, rank=args.rank
-            )
-        )
-        attn_module.to_v.set_lora_layer(
-            LoRALinearLayer(
-                in_features=attn_module.to_v.in_features, out_features=attn_module.to_v.out_features, rank=args.rank
-            )
-        )
-        attn_module.to_out[0].set_lora_layer(
-            LoRALinearLayer(
-                in_features=attn_module.to_out[0].in_features,
-                out_features=attn_module.to_out[0].out_features,
-                rank=args.rank,
-            )
-        )
-
-        # Accumulate the LoRA params to optimize.
-        unet_lora_parameters.extend(attn_module.to_q.lora_layer.parameters())
-        unet_lora_parameters.extend(attn_module.to_k.lora_layer.parameters())
-        unet_lora_parameters.extend(attn_module.to_v.lora_layer.parameters())
-        unet_lora_parameters.extend(attn_module.to_out[0].lora_layer.parameters())
+    unet_lora_config = LoraConfig(r=args.rank, target_modules=["to_k", "to_q", "to_v", "to_out.0"])
+    unet.add_adapter(unet_lora_config)
 
     # The text encoder comes from 🤗 transformers, so we cannot directly modify it.
     # So, instead, we monkey-patch the forward calls of its attention-blocks.
     if args.train_text_encoder:
-        # ensure that dtype is float32, even if rest of the model that isn't trained is loaded in fp16
-        text_lora_parameters_one = LoraLoaderMixin._modify_text_encoder(
-            text_encoder_one, dtype=torch.float32, rank=args.rank
-        )
-        text_lora_parameters_two = LoraLoaderMixin._modify_text_encoder(
-            text_encoder_two, dtype=torch.float32, rank=args.rank
-        )
+        text_lora_config = LoraConfig(r=args.rank, target_modules=["q_proj", "k_proj", "v_proj", "out_proj"])
+        text_encoder_one.add_adapter(text_lora_config)
+        text_encoder_two.add_adapter(text_lora_config)
 
     # create custom saving & loading hooks so that `accelerator.save_state(...)` serializes in a nice format
     def save_model_hook(models, weights, output_dir):
@@ -805,11 +765,11 @@ def save_model_hook(models, weights, output_dir):
 
             for model in models:
                 if isinstance(model, type(accelerator.unwrap_model(unet))):
-                    unet_lora_layers_to_save = unet_lora_state_dict(model)
+                    unet_lora_layers_to_save = get_peft_model_state_dict(model)
                 elif isinstance(model, type(accelerator.unwrap_model(text_encoder_one))):
-                    text_encoder_one_lora_layers_to_save = text_encoder_lora_state_dict(model)
+                    text_encoder_one_lora_layers_to_save = get_peft_model_state_dict(model)
                 elif isinstance(model, type(accelerator.unwrap_model(text_encoder_two))):
-                    text_encoder_two_lora_layers_to_save = text_encoder_lora_state_dict(model)
+                    text_encoder_two_lora_layers_to_save = get_peft_model_state_dict(model)
                 else:
                     raise ValueError(f"unexpected save model: {model.__class__}")
 
@@ -879,12 +839,12 @@ def load_model_hook(models, input_dir):
     else:
         optimizer_class = torch.optim.AdamW
 
-    # Optimizer creation
-    params_to_optimize = (
-        itertools.chain(unet_lora_parameters, text_lora_parameters_one, text_lora_parameters_two)
-        if args.train_text_encoder
-        else unet_lora_parameters
-    )
+    params_to_optimize = list(filter(lambda p: p.requires_grad, unet.parameters()))
+    if args.train_text_encoder:
+        params_to_optimize = params_to_optimize + list(
+            filter(lambda p: p.requires_grad, [text_encoder_one.parameters(), text_encoder_two.parameters()])
+        )
+
     optimizer = optimizer_class(
         params_to_optimize,
         lr=args.learning_rate,
@@ -1155,12 +1115,7 @@ def compute_text_embeddings(prompt, text_encoders, tokenizers):
 
                 accelerator.backward(loss)
                 if accelerator.sync_gradients:
-                    params_to_clip = (
-                        itertools.chain(unet_lora_parameters, text_lora_parameters_one, text_lora_parameters_two)
-                        if args.train_text_encoder
-                        else unet_lora_parameters
-                    )
-                    accelerator.clip_grad_norm_(params_to_clip, args.max_grad_norm)
+                    accelerator.clip_grad_norm_(params_to_optimize, args.max_grad_norm)
                 optimizer.step()
                 lr_scheduler.step()
                 optimizer.zero_grad()
@@ -1277,13 +1232,13 @@ def compute_text_embeddings(prompt, text_encoders, tokenizers):
     if accelerator.is_main_process:
         unet = accelerator.unwrap_model(unet)
         unet = unet.to(torch.float32)
-        unet_lora_layers = unet_lora_state_dict(unet)
+        unet_lora_layers = get_peft_model_state_dict(unet)
 
         if args.train_text_encoder:
             text_encoder_one = accelerator.unwrap_model(text_encoder_one)
-            text_encoder_lora_layers = text_encoder_lora_state_dict(text_encoder_one.to(torch.float32))
+            text_encoder_lora_layers = get_peft_model_state_dict(text_encoder_one.to(torch.float32))
             text_encoder_two = accelerator.unwrap_model(text_encoder_two)
-            text_encoder_2_lora_layers = text_encoder_lora_state_dict(text_encoder_two.to(torch.float32))
+            text_encoder_2_lora_layers = get_peft_model_state_dict(text_encoder_two.to(torch.float32))
         else:
             text_encoder_lora_layers = None
             text_encoder_2_lora_layers = None

From dff2995fd0f7f9c25076efd77f3a81ba3d6a9b57 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Tue, 14 Nov 2023 14:42:39 +0000
Subject: [PATCH 13/25] final fix

---
 examples/dreambooth/train_dreambooth_lora_sdxl.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/examples/dreambooth/train_dreambooth_lora_sdxl.py b/examples/dreambooth/train_dreambooth_lora_sdxl.py
index dbc4babb85d5..6eec58080741 100644
--- a/examples/dreambooth/train_dreambooth_lora_sdxl.py
+++ b/examples/dreambooth/train_dreambooth_lora_sdxl.py
@@ -841,8 +841,10 @@ def load_model_hook(models, input_dir):
 
     params_to_optimize = list(filter(lambda p: p.requires_grad, unet.parameters()))
     if args.train_text_encoder:
-        params_to_optimize = params_to_optimize + list(
-            filter(lambda p: p.requires_grad, [text_encoder_one.parameters(), text_encoder_two.parameters()])
+        params_to_optimize = (
+            params_to_optimize
+            + list(filter(lambda p: p.requires_grad, text_encoder_one.parameters()))
+            + list(filter(lambda p: p.requires_grad, text_encoder_two.parameters()))
         )
 
     optimizer = optimizer_class(

From 978d0cd6c6df998065b17ef949689fe6741cd3dc Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 16 Nov 2023 08:38:44 +0000
Subject: [PATCH 14/25] add peft version in requirements

---
 examples/dreambooth/requirements.txt    | 2 +-
 examples/text_to_image/requirements.txt | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/dreambooth/requirements.txt b/examples/dreambooth/requirements.txt
index 75bf0a4a7920..bf5ce39b8682 100644
--- a/examples/dreambooth/requirements.txt
+++ b/examples/dreambooth/requirements.txt
@@ -4,4 +4,4 @@ transformers>=4.25.1
 ftfy
 tensorboard
 Jinja2
-peft
\ No newline at end of file
+peft>=0.6.0
\ No newline at end of file
diff --git a/examples/text_to_image/requirements.txt b/examples/text_to_image/requirements.txt
index 31b9026efdc2..6eb46d193dae 100644
--- a/examples/text_to_image/requirements.txt
+++ b/examples/text_to_image/requirements.txt
@@ -5,3 +5,4 @@ datasets
 ftfy
 tensorboard
 Jinja2
+peft>=0.6.0
\ No newline at end of file

From f17140475d9f5c74b7e49e10bcedcd1fb6cb629b Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 16 Nov 2023 08:39:30 +0000
Subject: [PATCH 15/25] remove comment

---
 examples/dreambooth/train_dreambooth_lora.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/dreambooth/train_dreambooth_lora.py b/examples/dreambooth/train_dreambooth_lora.py
index 809e769d5d1a..7cef96ccbafe 100644
--- a/examples/dreambooth/train_dreambooth_lora.py
+++ b/examples/dreambooth/train_dreambooth_lora.py
@@ -823,7 +823,6 @@ def main(args):
 
     # The text encoder comes from 🤗 transformers, we will also attach adapters to it.
     if args.train_text_encoder:
-        # ensure that dtype is float32, even if rest of the model that isn't trained is loaded in fp16
         text_lora_config = LoraConfig(r=args.rank, target_modules=["q_proj", "k_proj", "v_proj", "out_proj"])
         text_encoder.add_adapter(text_lora_config)
 

From a2f3f202bb5ea859dc36be02b244497bdb4610d0 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 16 Nov 2023 08:40:36 +0000
Subject: [PATCH 16/25] change variable names

---
 examples/dreambooth/train_dreambooth_lora.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/dreambooth/train_dreambooth_lora.py b/examples/dreambooth/train_dreambooth_lora.py
index 7cef96ccbafe..e8f511cae601 100644
--- a/examples/dreambooth/train_dreambooth_lora.py
+++ b/examples/dreambooth/train_dreambooth_lora.py
@@ -1271,14 +1271,14 @@ def compute_text_embeddings(prompt):
 
         if args.train_text_encoder:
             text_encoder = accelerator.unwrap_model(text_encoder)
-            text_encoder_lora_layers = get_peft_model_state_dict(text_encoder)
+            text_encoder_state_dict = get_peft_model_state_dict(text_encoder)
         else:
-            text_encoder_lora_layers = None
+            text_encoder_state_dict = None
 
         LoraLoaderMixin.save_lora_weights(
             save_directory=args.output_dir,
             unet_lora_layers=unet_lora_state_dict,
-            text_encoder_lora_layers=text_encoder_lora_layers,
+            text_encoder_lora_layers=text_encoder_state_dict,
         )
 
         # Final inference

From 14b0dd2e38c5cb87c3a30799465ece6e43fcadc0 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 16 Nov 2023 08:42:52 +0000
Subject: [PATCH 17/25] add few lines in readme

---
 examples/dreambooth/README.md         | 1 +
 examples/dreambooth/README_sdxl.md    | 1 +
 examples/text_to_image/README.md      | 2 ++
 examples/text_to_image/README_sdxl.md | 1 +
 4 files changed, 5 insertions(+)

diff --git a/examples/dreambooth/README.md b/examples/dreambooth/README.md
index 0579e337939d..972fe6e8cffb 100644
--- a/examples/dreambooth/README.md
+++ b/examples/dreambooth/README.md
@@ -44,6 +44,7 @@ write_basic_config()
 ```
 
 When running `accelerate config`, if we specify torch compile mode to True there can be dramatic speedups. 
+Note also that we use PEFT library as backend for LoRA training, make sure to have `peft>=0.6.0` installed in your environment.
 
 ### Dog toy example
 
diff --git a/examples/dreambooth/README_sdxl.md b/examples/dreambooth/README_sdxl.md
index d78d1ef5d2dd..66232d3063f5 100644
--- a/examples/dreambooth/README_sdxl.md
+++ b/examples/dreambooth/README_sdxl.md
@@ -47,6 +47,7 @@ write_basic_config()
 ```
 
 When running `accelerate config`, if we specify torch compile mode to True there can be dramatic speedups. 
+Note also that we use PEFT library as backend for LoRA training, make sure to have `peft>=0.6.0` installed in your environment.
 
 ### Dog toy example
 
diff --git a/examples/text_to_image/README.md b/examples/text_to_image/README.md
index 7b9f4013c746..e2cbaca2a9d8 100644
--- a/examples/text_to_image/README.md
+++ b/examples/text_to_image/README.md
@@ -32,6 +32,8 @@ And initialize an [🤗Accelerate](https://github.com/huggingface/accelerate/) e
 accelerate config
 ```
 
+Note also that we use PEFT library as backend for LoRA training, make sure to have `peft>=0.6.0` installed in your environment.
+
 ### Pokemon example
 
 You need to accept the model license before downloading or using the weights. In this example we'll use model version `v1-4`, so you'll need to visit [its card](https://huggingface.co/CompVis/stable-diffusion-v1-4), read the license and tick the checkbox if you agree. 
diff --git a/examples/text_to_image/README_sdxl.md b/examples/text_to_image/README_sdxl.md
index 75c9cb126472..1278185ddf1f 100644
--- a/examples/text_to_image/README_sdxl.md
+++ b/examples/text_to_image/README_sdxl.md
@@ -45,6 +45,7 @@ write_basic_config()
 ```
 
 When running `accelerate config`, if we specify torch compile mode to True there can be dramatic speedups.
+Note also that we use PEFT library as backend for LoRA training, make sure to have `peft>=0.6.0` installed in your environment.
 
 ### Training
 

From b21064f68ffad648455da116ba4b6bb669d1a223 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Fri, 17 Nov 2023 13:32:57 +0000
Subject: [PATCH 18/25] add to reqs

---
 examples/dreambooth/requirements_sdxl.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/examples/dreambooth/requirements_sdxl.txt b/examples/dreambooth/requirements_sdxl.txt
index 7a612982f4ab..bf5ce39b8682 100644
--- a/examples/dreambooth/requirements_sdxl.txt
+++ b/examples/dreambooth/requirements_sdxl.txt
@@ -4,3 +4,4 @@ transformers>=4.25.1
 ftfy
 tensorboard
 Jinja2
+peft>=0.6.0
\ No newline at end of file

From b4e108b103951cca3624124e61c64eb13b57a518 Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Mon, 20 Nov 2023 14:14:54 +0000
Subject: [PATCH 19/25] style

---
 examples/dreambooth/train_dreambooth_lora.py      | 2 --
 examples/dreambooth/train_dreambooth_lora_sdxl.py | 2 --
 2 files changed, 4 deletions(-)

diff --git a/examples/dreambooth/train_dreambooth_lora.py b/examples/dreambooth/train_dreambooth_lora.py
index b39c800ac72a..537f79e2f1fa 100644
--- a/examples/dreambooth/train_dreambooth_lora.py
+++ b/examples/dreambooth/train_dreambooth_lora.py
@@ -16,8 +16,6 @@
 import argparse
 import copy
 import gc
-import hashlib
-import itertools
 import logging
 import math
 import os
diff --git a/examples/dreambooth/train_dreambooth_lora_sdxl.py b/examples/dreambooth/train_dreambooth_lora_sdxl.py
index 9abc2c9192c9..5dbffedd0014 100644
--- a/examples/dreambooth/train_dreambooth_lora_sdxl.py
+++ b/examples/dreambooth/train_dreambooth_lora_sdxl.py
@@ -15,8 +15,6 @@
 
 import argparse
 import gc
-import hashlib
-import itertools
 import logging
 import math
 import os

From 75c3948b649598f7005133b05d9b2f6895657abf Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 23 Nov 2023 15:04:20 +0000
Subject: [PATCH 20/25] fix issues

---
 examples/dreambooth/train_dreambooth_lora_sdxl.py  | 9 ++++++++-
 examples/text_to_image/train_text_to_image_lora.py | 4 ++--
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/examples/dreambooth/train_dreambooth_lora_sdxl.py b/examples/dreambooth/train_dreambooth_lora_sdxl.py
index b8238d0a128b..a9b3fc91abcc 100644
--- a/examples/dreambooth/train_dreambooth_lora_sdxl.py
+++ b/examples/dreambooth/train_dreambooth_lora_sdxl.py
@@ -15,6 +15,7 @@
 
 import argparse
 import gc
+import itertools
 import logging
 import math
 import os
@@ -51,8 +52,8 @@
     UNet2DConditionModel,
 )
 from diffusers.loaders import LoraLoaderMixin
-
 from diffusers.optimization import get_scheduler
+from diffusers.training_utils import compute_snr
 from diffusers.utils import check_min_version, is_wandb_available
 from diffusers.utils.import_utils import is_xformers_available
 
@@ -1073,6 +1074,12 @@ def load_model_hook(models, input_dir):
             args.learning_rate * args.gradient_accumulation_steps * args.train_batch_size * accelerator.num_processes
         )
 
+    unet_lora_parameters = list(filter(lambda p: p.requires_grad, unet.parameters()))
+
+    if args.train_text_encoder:
+        text_lora_parameters_one = list(filter(lambda p: p.requires_grad, text_encoder_one.parameters()))
+        text_lora_parameters_two = list(filter(lambda p: p.requires_grad, text_encoder_two.parameters()))
+
     # Optimization parameters
     unet_lora_parameters_with_lr = {"params": unet_lora_parameters, "lr": args.learning_rate}
     if args.train_text_encoder:
diff --git a/examples/text_to_image/train_text_to_image_lora.py b/examples/text_to_image/train_text_to_image_lora.py
index 32faa69998ea..6b1e0033e0db 100644
--- a/examples/text_to_image/train_text_to_image_lora.py
+++ b/examples/text_to_image/train_text_to_image_lora.py
@@ -651,8 +651,8 @@ def collate_fn(examples):
     )
 
     # Prepare everything with our `accelerator`.
-    unet_lora_parameters, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(
-        unet_lora_parameters, optimizer, train_dataloader, lr_scheduler
+    unet, optimizer, train_dataloader, lr_scheduler = accelerator.prepare(
+        unet, optimizer, train_dataloader, lr_scheduler
     )
 
     # We need to recalculate our total training steps as the size of the training dataloader may have changed.

From 1e94c4b7f617a596df37bfa6110efeb004aa9b0e Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Thu, 23 Nov 2023 15:47:46 +0000
Subject: [PATCH 21/25] fix lora dreambooth xl tests

---
 .../dreambooth/train_dreambooth_lora_sdxl.py  | 33 +++++++++----------
 1 file changed, 15 insertions(+), 18 deletions(-)

diff --git a/examples/dreambooth/train_dreambooth_lora_sdxl.py b/examples/dreambooth/train_dreambooth_lora_sdxl.py
index a9b3fc91abcc..4fa7bdc21d34 100644
--- a/examples/dreambooth/train_dreambooth_lora_sdxl.py
+++ b/examples/dreambooth/train_dreambooth_lora_sdxl.py
@@ -1144,25 +1144,17 @@ def load_model_hook(models, input_dir):
 
         optimizer_class = prodigyopt.Prodigy
 
-    params_to_optimize = list(filter(lambda p: p.requires_grad, unet.parameters()))
-    if args.train_text_encoder:
-        params_to_optimize = (
-            params_to_optimize
-            + list(filter(lambda p: p.requires_grad, text_encoder_one.parameters()))
-            + list(filter(lambda p: p.requires_grad, text_encoder_two.parameters()))
+        optimizer = optimizer_class(
+            params_to_optimize,
+            lr=args.learning_rate,
+            betas=(args.adam_beta1, args.adam_beta2),
+            weight_decay=args.adam_weight_decay,
+            eps=args.adam_epsilon,
+            decouple=args.prodigy_decouple,
+            use_bias_correction=args.prodigy_use_bias_correction,
+            safeguard_warmup=args.prodigy_safeguard_warmup,
         )
 
-    optimizer = optimizer_class(
-        params_to_optimize,
-        lr=args.learning_rate,
-        betas=(args.adam_beta1, args.adam_beta2),
-        weight_decay=args.adam_weight_decay,
-        eps=args.adam_epsilon,
-        decouple=args.prodigy_decouple,
-        use_bias_correction=args.prodigy_use_bias_correction,
-        safeguard_warmup=args.prodigy_safeguard_warmup,
-    )
-
     # Dataset and DataLoaders creation:
     train_dataset = DreamBoothDataset(
         instance_data_root=args.instance_data_dir,
@@ -1472,7 +1464,12 @@ def compute_text_embeddings(prompt, text_encoders, tokenizers):
 
                 accelerator.backward(loss)
                 if accelerator.sync_gradients:
-                    accelerator.clip_grad_norm_(params_to_optimize, args.max_grad_norm)
+                    params_to_clip = (
+                        itertools.chain(unet_lora_parameters, text_lora_parameters_one, text_lora_parameters_two)
+                        if args.train_text_encoder
+                        else unet_lora_parameters
+                    )
+                    accelerator.clip_grad_norm_(params_to_clip, args.max_grad_norm)
                 optimizer.step()
                 lr_scheduler.step()
                 optimizer.zero_grad()

From ada6ad896ed92bc27bf1ca8163c9b1a012bfb1b8 Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Wed, 29 Nov 2023 07:44:41 +0530
Subject: [PATCH 22/25] init_lora_weights to gaussian and add out proj where
 missing

---
 examples/dreambooth/train_dreambooth_lora.py           | 10 ++++++++--
 examples/dreambooth/train_dreambooth_lora_sdxl.py      |  8 ++++++--
 examples/text_to_image/train_text_to_image_lora.py     |  4 +++-
 .../text_to_image/train_text_to_image_lora_sdxl.py     |  8 ++++++--
 4 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/examples/dreambooth/train_dreambooth_lora.py b/examples/dreambooth/train_dreambooth_lora.py
index 1962f9cf4dfd..b96cb01b442e 100644
--- a/examples/dreambooth/train_dreambooth_lora.py
+++ b/examples/dreambooth/train_dreambooth_lora.py
@@ -858,12 +858,18 @@ def main(args):
             text_encoder.gradient_checkpointing_enable()
 
     # now we will add new LoRA weights to the attention layers
-    unet_lora_config = LoraConfig(r=args.rank, target_modules=["to_k", "to_q", "to_v", "add_k_proj", "add_v_proj"])
+    unet_lora_config = LoraConfig(
+        r=args.rank,
+        init_lora_weights="gaussian",
+        target_modules=["to_k", "to_q", "to_v", "to_out.0", "add_k_proj", "add_v_proj"],
+    )
     unet.add_adapter(unet_lora_config)
 
     # The text encoder comes from 🤗 transformers, we will also attach adapters to it.
     if args.train_text_encoder:
-        text_lora_config = LoraConfig(r=args.rank, target_modules=["q_proj", "k_proj", "v_proj", "out_proj"])
+        text_lora_config = LoraConfig(
+            r=args.rank, init_lora_weights="gaussian", target_modules=["q_proj", "k_proj", "v_proj", "out_proj"]
+        )
         text_encoder.add_adapter(text_lora_config)
 
     # create custom saving & loading hooks so that `accelerator.save_state(...)` serializes in a nice format
diff --git a/examples/dreambooth/train_dreambooth_lora_sdxl.py b/examples/dreambooth/train_dreambooth_lora_sdxl.py
index 529eec1c53b2..6a5c1ca9a642 100644
--- a/examples/dreambooth/train_dreambooth_lora_sdxl.py
+++ b/examples/dreambooth/train_dreambooth_lora_sdxl.py
@@ -1010,13 +1010,17 @@ def main(args):
             text_encoder_two.gradient_checkpointing_enable()
 
     # now we will add new LoRA weights to the attention layers
-    unet_lora_config = LoraConfig(r=args.rank, target_modules=["to_k", "to_q", "to_v", "to_out.0"])
+    unet_lora_config = LoraConfig(
+        r=args.rank, init_lora_weights="gaussian", target_modules=["to_k", "to_q", "to_v", "to_out.0"]
+    )
     unet.add_adapter(unet_lora_config)
 
     # The text encoder comes from 🤗 transformers, so we cannot directly modify it.
     # So, instead, we monkey-patch the forward calls of its attention-blocks.
     if args.train_text_encoder:
-        text_lora_config = LoraConfig(r=args.rank, target_modules=["q_proj", "k_proj", "v_proj", "out_proj"])
+        text_lora_config = LoraConfig(
+            r=args.rank, init_lora_weights="gaussian", target_modules=["q_proj", "k_proj", "v_proj", "out_proj"]
+        )
         text_encoder_one.add_adapter(text_lora_config)
         text_encoder_two.add_adapter(text_lora_config)
 
diff --git a/examples/text_to_image/train_text_to_image_lora.py b/examples/text_to_image/train_text_to_image_lora.py
index 47eb0abd5bf3..d90441ff4de9 100644
--- a/examples/text_to_image/train_text_to_image_lora.py
+++ b/examples/text_to_image/train_text_to_image_lora.py
@@ -484,7 +484,9 @@ def main():
     for param in unet.parameters():
         param.requires_grad_(False)
 
-    unet_lora_config = LoraConfig(r=args.rank, target_modules=["to_k", "to_q", "to_v"])
+    unet_lora_config = LoraConfig(
+        r=args.rank, init_lora_weights="gaussian", target_modules=["to_k", "to_q", "to_v", "to_out.0"]
+    )
 
     # Move unet, vae and text_encoder to device and cast to weight_dtype
     unet.to(accelerator.device, dtype=weight_dtype)
diff --git a/examples/text_to_image/train_text_to_image_lora_sdxl.py b/examples/text_to_image/train_text_to_image_lora_sdxl.py
index 7f76057e9973..d025d6548cc5 100644
--- a/examples/text_to_image/train_text_to_image_lora_sdxl.py
+++ b/examples/text_to_image/train_text_to_image_lora_sdxl.py
@@ -658,14 +658,18 @@ def main(args):
 
     # now we will add new LoRA weights to the attention layers
     # Set correct lora layers
-    unet_lora_config = LoraConfig(r=args.rank, target_modules=["to_k", "to_q", "to_v"])
+    unet_lora_config = LoraConfig(
+        r=args.rank, init_lora_weights="gaussian", target_modules=["to_k", "to_q", "to_v", "to_out.0"]
+    )
 
     unet.add_adapter(unet_lora_config)
 
     # The text encoder comes from 🤗 transformers, we will also attach adapters to it.
     if args.train_text_encoder:
         # ensure that dtype is float32, even if rest of the model that isn't trained is loaded in fp16
-        text_lora_config = LoraConfig(r=args.rank, target_modules=["q_proj", "k_proj", "v_proj", "out_proj"])
+        text_lora_config = LoraConfig(
+            r=args.rank, init_lora_weights="gaussian", target_modules=["q_proj", "k_proj", "v_proj", "out_proj"]
+        )
         text_encoder_one.add_adapter(text_lora_config)
         text_encoder_two.add_adapter(text_lora_config)
 

From 252dcdac0537aee30b0a56db18bbcca7a927a72e Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Wed, 29 Nov 2023 07:59:05 +0530
Subject: [PATCH 23/25] ammend requirements.

---
 examples/dreambooth/requirements_sdxl.txt    | 2 +-
 examples/text_to_image/requirements_sdxl.txt | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/dreambooth/requirements_sdxl.txt b/examples/dreambooth/requirements_sdxl.txt
index bf5ce39b8682..2f7b5060971a 100644
--- a/examples/dreambooth/requirements_sdxl.txt
+++ b/examples/dreambooth/requirements_sdxl.txt
@@ -4,4 +4,4 @@ transformers>=4.25.1
 ftfy
 tensorboard
 Jinja2
-peft>=0.6.0
\ No newline at end of file
+peft @ git+https://github.com/huggingface/peft.git
\ No newline at end of file
diff --git a/examples/text_to_image/requirements_sdxl.txt b/examples/text_to_image/requirements_sdxl.txt
index cdd3336e3617..476e1d873d27 100644
--- a/examples/text_to_image/requirements_sdxl.txt
+++ b/examples/text_to_image/requirements_sdxl.txt
@@ -5,3 +5,4 @@ ftfy
 tensorboard
 Jinja2
 datasets
+peft @ git+https://github.com/huggingface/peft.git
\ No newline at end of file

From 90b760a61e08250e8e8ed3b53ce708f2fff4096f Mon Sep 17 00:00:00 2001
From: sayakpaul <spsayakpaul@gmail.com>
Date: Wed, 29 Nov 2023 08:02:32 +0530
Subject: [PATCH 24/25] ammend requirements.txt

---
 examples/dreambooth/requirements.txt    | 2 +-
 examples/text_to_image/requirements.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/dreambooth/requirements.txt b/examples/dreambooth/requirements.txt
index bf5ce39b8682..2f7b5060971a 100644
--- a/examples/dreambooth/requirements.txt
+++ b/examples/dreambooth/requirements.txt
@@ -4,4 +4,4 @@ transformers>=4.25.1
 ftfy
 tensorboard
 Jinja2
-peft>=0.6.0
\ No newline at end of file
+peft @ git+https://github.com/huggingface/peft.git
\ No newline at end of file
diff --git a/examples/text_to_image/requirements.txt b/examples/text_to_image/requirements.txt
index 6eb46d193dae..9394ad3354c6 100644
--- a/examples/text_to_image/requirements.txt
+++ b/examples/text_to_image/requirements.txt
@@ -5,4 +5,4 @@ datasets
 ftfy
 tensorboard
 Jinja2
-peft>=0.6.0
\ No newline at end of file
+peft @ git+https://github.com/huggingface/peft.git
\ No newline at end of file

From 57516edb416bcbe13d61211d660773d334cbe5aa Mon Sep 17 00:00:00 2001
From: younesbelkada <younesbelkada@gmail.com>
Date: Wed, 6 Dec 2023 16:27:36 +0000
Subject: [PATCH 25/25] add correct peft versions

---
 examples/dreambooth/requirements.txt         | 2 +-
 examples/dreambooth/requirements_sdxl.txt    | 2 +-
 examples/text_to_image/requirements.txt      | 2 +-
 examples/text_to_image/requirements_sdxl.txt | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/examples/dreambooth/requirements.txt b/examples/dreambooth/requirements.txt
index 2f7b5060971a..3f86855e1d1e 100644
--- a/examples/dreambooth/requirements.txt
+++ b/examples/dreambooth/requirements.txt
@@ -4,4 +4,4 @@ transformers>=4.25.1
 ftfy
 tensorboard
 Jinja2
-peft @ git+https://github.com/huggingface/peft.git
\ No newline at end of file
+peft==0.7.0
\ No newline at end of file
diff --git a/examples/dreambooth/requirements_sdxl.txt b/examples/dreambooth/requirements_sdxl.txt
index 2f7b5060971a..3f86855e1d1e 100644
--- a/examples/dreambooth/requirements_sdxl.txt
+++ b/examples/dreambooth/requirements_sdxl.txt
@@ -4,4 +4,4 @@ transformers>=4.25.1
 ftfy
 tensorboard
 Jinja2
-peft @ git+https://github.com/huggingface/peft.git
\ No newline at end of file
+peft==0.7.0
\ No newline at end of file
diff --git a/examples/text_to_image/requirements.txt b/examples/text_to_image/requirements.txt
index 9394ad3354c6..0dd164fc2035 100644
--- a/examples/text_to_image/requirements.txt
+++ b/examples/text_to_image/requirements.txt
@@ -5,4 +5,4 @@ datasets
 ftfy
 tensorboard
 Jinja2
-peft @ git+https://github.com/huggingface/peft.git
\ No newline at end of file
+peft==0.7.0
\ No newline at end of file
diff --git a/examples/text_to_image/requirements_sdxl.txt b/examples/text_to_image/requirements_sdxl.txt
index 476e1d873d27..64cbc9205fd0 100644
--- a/examples/text_to_image/requirements_sdxl.txt
+++ b/examples/text_to_image/requirements_sdxl.txt
@@ -5,4 +5,4 @@ ftfy
 tensorboard
 Jinja2
 datasets
-peft @ git+https://github.com/huggingface/peft.git
\ No newline at end of file
+peft==0.7.0
\ No newline at end of file