From c34099d8f97b6cae14ffb223d2dbd06abfdd6d3f Mon Sep 17 00:00:00 2001
From: dumpmemory <64742282+dumpmemory@users.noreply.github.com>
Date: Mon, 24 Apr 2023 14:50:04 +0800
Subject: [PATCH 1/7] try to fix Zero3 Memory Leak

---
 src/peft/tuners/adalora.py | 2 +-
 src/peft/tuners/lora.py    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/peft/tuners/adalora.py b/src/peft/tuners/adalora.py
index dded9c659f..a89c288b30 100644
--- a/src/peft/tuners/adalora.py
+++ b/src/peft/tuners/adalora.py
@@ -431,7 +431,7 @@ def forward(self, x: torch.Tensor):
                 self.unmerge()
             result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias)
         elif self.r[self.active_adapter] > 0 and not self.merged:
-            result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias)
+            result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) + self.bias
             result += (
                 (
                     self.lora_dropout[self.active_adapter](x)
diff --git a/src/peft/tuners/lora.py b/src/peft/tuners/lora.py
index 57a7026e10..bed4b84776 100644
--- a/src/peft/tuners/lora.py
+++ b/src/peft/tuners/lora.py
@@ -490,7 +490,7 @@ def forward(self, x: torch.Tensor):
                 self.unmerge()
             result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias)
         elif self.r[self.active_adapter] > 0 and not self.merged:
-            result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias)
+            result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) + self.bias
 
             x = x.to(self.lora_A[self.active_adapter].weight.dtype)
 

From faeda26cca5dcc1f23a61812dbf83e755c5dd1a3 Mon Sep 17 00:00:00 2001
From: dumpmemory <64742282+dumpmemory@users.noreply.github.com>
Date: Mon, 24 Apr 2023 14:55:43 +0800
Subject: [PATCH 2/7] Update README.md

---
 README.md | 2 --
 1 file changed, 2 deletions(-)

diff --git a/README.md b/README.md
index 6a0d89c1f9..94d52e8663 100644
--- a/README.md
+++ b/README.md
@@ -358,8 +358,6 @@ any GPU memory savings. Please refer issue [[FSDP] FSDP with CPU offload consume
 `P_TUNING`/`PROMPT_TUNING` appends soft prompt embeddings to `input_embeds` to create
 new `input_embeds` to be given to the model. Therefore, `generate` doesn't support this yet.
 
-4. When using ZeRO3 with zero3_init_flag=True, if you find the gpu memory increase with training steps. we might need to set zero3_init_flag=false in accelerate config.yaml. The related issue is [[BUG] memory leak under zero.Init](https://github.com/microsoft/DeepSpeed/issues/2637)
-
 ## Backlog:
 - [x] Add tests
 - [x] Multi Adapter training and inference support

From 26c0c6763436557088fe77a9ae96b00188689116 Mon Sep 17 00:00:00 2001
From: dumpmemory <64742282+dumpmemory@users.noreply.github.com>
Date: Mon, 24 Apr 2023 15:32:22 +0800
Subject: [PATCH 3/7] handle bias ==None

---
 src/peft/tuners/adalora.py | 6 +++++-
 src/peft/tuners/lora.py    | 5 ++++-
 2 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/peft/tuners/adalora.py b/src/peft/tuners/adalora.py
index a89c288b30..1645cc7dff 100644
--- a/src/peft/tuners/adalora.py
+++ b/src/peft/tuners/adalora.py
@@ -431,7 +431,11 @@ def forward(self, x: torch.Tensor):
                 self.unmerge()
             result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias)
         elif self.r[self.active_adapter] > 0 and not self.merged:
-            result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) + self.bias
+            result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out))
+            
+            if self.bias:
+                result += self.bias
+                
             result += (
                 (
                     self.lora_dropout[self.active_adapter](x)
diff --git a/src/peft/tuners/lora.py b/src/peft/tuners/lora.py
index bed4b84776..be9a166e63 100644
--- a/src/peft/tuners/lora.py
+++ b/src/peft/tuners/lora.py
@@ -490,7 +490,10 @@ def forward(self, x: torch.Tensor):
                 self.unmerge()
             result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias)
         elif self.r[self.active_adapter] > 0 and not self.merged:
-            result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) + self.bias
+            result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) 
+            
+            if self.bias:
+                result += self.bias
 
             x = x.to(self.lora_A[self.active_adapter].weight.dtype)
 

From 1928d98e52ed313cadb455a361a23702f0805a60 Mon Sep 17 00:00:00 2001
From: dumpmemory <64742282+dumpmemory@users.noreply.github.com>
Date: Fri, 28 Apr 2023 10:32:56 +0800
Subject: [PATCH 4/7] replace all F.linear with torch.matul and fix not
 fan_in_fan_out bug

---
 src/peft/tuners/adalora.py | 18 +++++++++++++++---
 src/peft/tuners/lora.py    | 19 +++++++++++++++----
 2 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/src/peft/tuners/adalora.py b/src/peft/tuners/adalora.py
index 1645cc7dff..d2f77f6046 100644
--- a/src/peft/tuners/adalora.py
+++ b/src/peft/tuners/adalora.py
@@ -425,11 +425,19 @@ def unmerge(self):
 
     def forward(self, x: torch.Tensor):
         if self.active_adapter not in self.lora_A.keys():
-            return F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias)
+            result = torch.matmul(x, transpose(self.weight, self.fan_in_fan_out)) 
+            
+            if self.bias:
+                result += self.bias
+            return result
+
         if self.disable_adapters:
             if self.r[self.active_adapter] > 0 and self.merged:
                 self.unmerge()
-            result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias)
+            result = torch.matmul(x, transpose(self.weight, self.fan_in_fan_out)) 
+            
+            if self.bias:
+                result += self.bias
         elif self.r[self.active_adapter] > 0 and not self.merged:
             result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out))
             
@@ -446,7 +454,11 @@ def forward(self, x: torch.Tensor):
                 / (self.ranknum[self.active_adapter] + 1e-5)
             )
         else:
-            result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias)
+            result = torch.matmul(x, transpose(self.weight, self.fan_in_fan_out)) 
+            
+            if self.bias:
+                result += self.bias
+
         return result
 
 
diff --git a/src/peft/tuners/lora.py b/src/peft/tuners/lora.py
index be9a166e63..19a0fa41ab 100644
--- a/src/peft/tuners/lora.py
+++ b/src/peft/tuners/lora.py
@@ -484,13 +484,21 @@ def forward(self, x: torch.Tensor):
         previous_dtype = x.dtype
 
         if self.active_adapter not in self.lora_A.keys():
-            return F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias)
+            result = torch.matmul(x, transpose(self.weight, self.fan_in_fan_out)) 
+            
+            if self.bias:
+                result += self.bias
+            return result
+
         if self.disable_adapters:
             if self.r[self.active_adapter] > 0 and self.merged:
                 self.unmerge()
-            result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias)
+            result = torch.matmul(x, transpose(self.weight, self.fan_in_fan_out)) 
+            
+            if self.bias:
+                result += self.bias
         elif self.r[self.active_adapter] > 0 and not self.merged:
-            result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) 
+            result = torch.matmul(x, transpose(self.weight, self.fan_in_fan_out)) 
             
             if self.bias:
                 result += self.bias
@@ -504,7 +512,10 @@ def forward(self, x: torch.Tensor):
                 * self.scaling[self.active_adapter]
             )
         else:
-            result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias)
+            result = torch.matmul(x, transpose(self.weight, self.fan_in_fan_out)) 
+            
+            if self.bias:
+                result += self.bias
 
         result = result.to(previous_dtype)
 

From 585cebb82b48f0546e6de319756a3616ff201fe4 Mon Sep 17 00:00:00 2001
From: dumpmemory <64742282+dumpmemory@users.noreply.github.com>
Date: Fri, 28 Apr 2023 10:36:24 +0800
Subject: [PATCH 5/7] add not back according to @tohtana's idea.

---
 src/peft/tuners/adalora.py | 6 +++---
 src/peft/tuners/lora.py    | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/src/peft/tuners/adalora.py b/src/peft/tuners/adalora.py
index d2f77f6046..cea8fa5a89 100644
--- a/src/peft/tuners/adalora.py
+++ b/src/peft/tuners/adalora.py
@@ -425,7 +425,7 @@ def unmerge(self):
 
     def forward(self, x: torch.Tensor):
         if self.active_adapter not in self.lora_A.keys():
-            result = torch.matmul(x, transpose(self.weight, self.fan_in_fan_out)) 
+            result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) 
             
             if self.bias:
                 result += self.bias
@@ -434,7 +434,7 @@ def forward(self, x: torch.Tensor):
         if self.disable_adapters:
             if self.r[self.active_adapter] > 0 and self.merged:
                 self.unmerge()
-            result = torch.matmul(x, transpose(self.weight, self.fan_in_fan_out)) 
+            result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) 
             
             if self.bias:
                 result += self.bias
@@ -454,7 +454,7 @@ def forward(self, x: torch.Tensor):
                 / (self.ranknum[self.active_adapter] + 1e-5)
             )
         else:
-            result = torch.matmul(x, transpose(self.weight, self.fan_in_fan_out)) 
+            result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) 
             
             if self.bias:
                 result += self.bias
diff --git a/src/peft/tuners/lora.py b/src/peft/tuners/lora.py
index 19a0fa41ab..2991692713 100644
--- a/src/peft/tuners/lora.py
+++ b/src/peft/tuners/lora.py
@@ -484,7 +484,7 @@ def forward(self, x: torch.Tensor):
         previous_dtype = x.dtype
 
         if self.active_adapter not in self.lora_A.keys():
-            result = torch.matmul(x, transpose(self.weight, self.fan_in_fan_out)) 
+            result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) 
             
             if self.bias:
                 result += self.bias
@@ -493,12 +493,12 @@ def forward(self, x: torch.Tensor):
         if self.disable_adapters:
             if self.r[self.active_adapter] > 0 and self.merged:
                 self.unmerge()
-            result = torch.matmul(x, transpose(self.weight, self.fan_in_fan_out)) 
+            result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) 
             
             if self.bias:
                 result += self.bias
         elif self.r[self.active_adapter] > 0 and not self.merged:
-            result = torch.matmul(x, transpose(self.weight, self.fan_in_fan_out)) 
+            result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) 
             
             if self.bias:
                 result += self.bias
@@ -512,7 +512,7 @@ def forward(self, x: torch.Tensor):
                 * self.scaling[self.active_adapter]
             )
         else:
-            result = torch.matmul(x, transpose(self.weight, self.fan_in_fan_out)) 
+            result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) 
             
             if self.bias:
                 result += self.bias

From 0dde8068f23599d5861f824f5fc437a4b6db259f Mon Sep 17 00:00:00 2001
From: dumpmemory <64742282+dumpmemory@users.noreply.github.com>
Date: Fri, 5 May 2023 01:18:33 +0000
Subject: [PATCH 6/7] fix style and quality issues

---
 src/peft/tuners/adalora.py | 17 ++++++++---------
 src/peft/tuners/lora.py    | 17 ++++++++---------
 2 files changed, 16 insertions(+), 18 deletions(-)

diff --git a/src/peft/tuners/adalora.py b/src/peft/tuners/adalora.py
index cea8fa5a89..4d498d4a15 100644
--- a/src/peft/tuners/adalora.py
+++ b/src/peft/tuners/adalora.py
@@ -6,7 +6,6 @@
 
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 from transformers.pytorch_utils import Conv1D
 
 from ..utils import (
@@ -425,8 +424,8 @@ def unmerge(self):
 
     def forward(self, x: torch.Tensor):
         if self.active_adapter not in self.lora_A.keys():
-            result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) 
-            
+            result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out))
+
             if self.bias:
                 result += self.bias
             return result
@@ -434,16 +433,16 @@ def forward(self, x: torch.Tensor):
         if self.disable_adapters:
             if self.r[self.active_adapter] > 0 and self.merged:
                 self.unmerge()
-            result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) 
-            
+            result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out))
+
             if self.bias:
                 result += self.bias
         elif self.r[self.active_adapter] > 0 and not self.merged:
             result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out))
-            
+
             if self.bias:
                 result += self.bias
-                
+
             result += (
                 (
                     self.lora_dropout[self.active_adapter](x)
@@ -454,8 +453,8 @@ def forward(self, x: torch.Tensor):
                 / (self.ranknum[self.active_adapter] + 1e-5)
             )
         else:
-            result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) 
-            
+            result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out))
+
             if self.bias:
                 result += self.bias
 
diff --git a/src/peft/tuners/lora.py b/src/peft/tuners/lora.py
index 2991692713..cea781da5b 100644
--- a/src/peft/tuners/lora.py
+++ b/src/peft/tuners/lora.py
@@ -21,7 +21,6 @@
 
 import torch
 import torch.nn as nn
-import torch.nn.functional as F
 from transformers.pytorch_utils import Conv1D
 
 from ..import_utils import is_bnb_available
@@ -484,8 +483,8 @@ def forward(self, x: torch.Tensor):
         previous_dtype = x.dtype
 
         if self.active_adapter not in self.lora_A.keys():
-            result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) 
-            
+            result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out))
+
             if self.bias:
                 result += self.bias
             return result
@@ -493,13 +492,13 @@ def forward(self, x: torch.Tensor):
         if self.disable_adapters:
             if self.r[self.active_adapter] > 0 and self.merged:
                 self.unmerge()
-            result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) 
-            
+            result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out))
+
             if self.bias:
                 result += self.bias
         elif self.r[self.active_adapter] > 0 and not self.merged:
-            result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) 
-            
+            result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out))
+
             if self.bias:
                 result += self.bias
 
@@ -512,8 +511,8 @@ def forward(self, x: torch.Tensor):
                 * self.scaling[self.active_adapter]
             )
         else:
-            result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) 
-            
+            result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out))
+
             if self.bias:
                 result += self.bias
 

From f09e563671fbf99d1af71ba5f13533f92899c9fd Mon Sep 17 00:00:00 2001
From: dumpmemory <64742282+dumpmemory@users.noreply.github.com>
Date: Wed, 10 May 2023 06:20:49 +0000
Subject: [PATCH 7/7] merge main and update missing F

---
 src/peft/tuners/lora.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/peft/tuners/lora.py b/src/peft/tuners/lora.py
index fef958cb8a..572f4836a4 100644
--- a/src/peft/tuners/lora.py
+++ b/src/peft/tuners/lora.py
@@ -21,6 +21,7 @@
 
 import torch
 import torch.nn as nn
+import torch.nn.functional as F
 from transformers.pytorch_utils import Conv1D
 
 from ..import_utils import is_bnb_available