From c34099d8f97b6cae14ffb223d2dbd06abfdd6d3f Mon Sep 17 00:00:00 2001 From: dumpmemory <64742282+dumpmemory@users.noreply.github.com> Date: Mon, 24 Apr 2023 14:50:04 +0800 Subject: [PATCH 1/7] try to fix Zero3 Memory Leak --- src/peft/tuners/adalora.py | 2 +- src/peft/tuners/lora.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/peft/tuners/adalora.py b/src/peft/tuners/adalora.py index dded9c659f..a89c288b30 100644 --- a/src/peft/tuners/adalora.py +++ b/src/peft/tuners/adalora.py @@ -431,7 +431,7 @@ def forward(self, x: torch.Tensor): self.unmerge() result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias) elif self.r[self.active_adapter] > 0 and not self.merged: - result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias) + result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) + self.bias result += ( ( self.lora_dropout[self.active_adapter](x) diff --git a/src/peft/tuners/lora.py b/src/peft/tuners/lora.py index 57a7026e10..bed4b84776 100644 --- a/src/peft/tuners/lora.py +++ b/src/peft/tuners/lora.py @@ -490,7 +490,7 @@ def forward(self, x: torch.Tensor): self.unmerge() result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias) elif self.r[self.active_adapter] > 0 and not self.merged: - result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias) + result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) + self.bias x = x.to(self.lora_A[self.active_adapter].weight.dtype) From faeda26cca5dcc1f23a61812dbf83e755c5dd1a3 Mon Sep 17 00:00:00 2001 From: dumpmemory <64742282+dumpmemory@users.noreply.github.com> Date: Mon, 24 Apr 2023 14:55:43 +0800 Subject: [PATCH 2/7] Update README.md --- README.md | 2 -- 1 file changed, 2 deletions(-) diff --git a/README.md b/README.md index 6a0d89c1f9..94d52e8663 100644 --- a/README.md +++ b/README.md @@ -358,8 +358,6 @@ any GPU memory savings. Please refer issue [[FSDP] FSDP with CPU offload consume `P_TUNING`/`PROMPT_TUNING` appends soft prompt embeddings to `input_embeds` to create new `input_embeds` to be given to the model. Therefore, `generate` doesn't support this yet. -4. When using ZeRO3 with zero3_init_flag=True, if you find the gpu memory increase with training steps. we might need to set zero3_init_flag=false in accelerate config.yaml. The related issue is [[BUG] memory leak under zero.Init](https://github.com/microsoft/DeepSpeed/issues/2637) - ## Backlog: - [x] Add tests - [x] Multi Adapter training and inference support From 26c0c6763436557088fe77a9ae96b00188689116 Mon Sep 17 00:00:00 2001 From: dumpmemory <64742282+dumpmemory@users.noreply.github.com> Date: Mon, 24 Apr 2023 15:32:22 +0800 Subject: [PATCH 3/7] handle bias ==None --- src/peft/tuners/adalora.py | 6 +++++- src/peft/tuners/lora.py | 5 ++++- 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/src/peft/tuners/adalora.py b/src/peft/tuners/adalora.py index a89c288b30..1645cc7dff 100644 --- a/src/peft/tuners/adalora.py +++ b/src/peft/tuners/adalora.py @@ -431,7 +431,11 @@ def forward(self, x: torch.Tensor): self.unmerge() result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias) elif self.r[self.active_adapter] > 0 and not self.merged: - result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) + self.bias + result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) + + if self.bias: + result += self.bias + result += ( ( self.lora_dropout[self.active_adapter](x) diff --git a/src/peft/tuners/lora.py b/src/peft/tuners/lora.py index bed4b84776..be9a166e63 100644 --- a/src/peft/tuners/lora.py +++ b/src/peft/tuners/lora.py @@ -490,7 +490,10 @@ def forward(self, x: torch.Tensor): self.unmerge() result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias) elif self.r[self.active_adapter] > 0 and not self.merged: - result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) + self.bias + result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) + + if self.bias: + result += self.bias x = x.to(self.lora_A[self.active_adapter].weight.dtype) From 1928d98e52ed313cadb455a361a23702f0805a60 Mon Sep 17 00:00:00 2001 From: dumpmemory <64742282+dumpmemory@users.noreply.github.com> Date: Fri, 28 Apr 2023 10:32:56 +0800 Subject: [PATCH 4/7] replace all F.linear with torch.matul and fix not fan_in_fan_out bug --- src/peft/tuners/adalora.py | 18 +++++++++++++++--- src/peft/tuners/lora.py | 19 +++++++++++++++---- 2 files changed, 30 insertions(+), 7 deletions(-) diff --git a/src/peft/tuners/adalora.py b/src/peft/tuners/adalora.py index 1645cc7dff..d2f77f6046 100644 --- a/src/peft/tuners/adalora.py +++ b/src/peft/tuners/adalora.py @@ -425,11 +425,19 @@ def unmerge(self): def forward(self, x: torch.Tensor): if self.active_adapter not in self.lora_A.keys(): - return F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias) + result = torch.matmul(x, transpose(self.weight, self.fan_in_fan_out)) + + if self.bias: + result += self.bias + return result + if self.disable_adapters: if self.r[self.active_adapter] > 0 and self.merged: self.unmerge() - result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias) + result = torch.matmul(x, transpose(self.weight, self.fan_in_fan_out)) + + if self.bias: + result += self.bias elif self.r[self.active_adapter] > 0 and not self.merged: result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) @@ -446,7 +454,11 @@ def forward(self, x: torch.Tensor): / (self.ranknum[self.active_adapter] + 1e-5) ) else: - result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias) + result = torch.matmul(x, transpose(self.weight, self.fan_in_fan_out)) + + if self.bias: + result += self.bias + return result diff --git a/src/peft/tuners/lora.py b/src/peft/tuners/lora.py index be9a166e63..19a0fa41ab 100644 --- a/src/peft/tuners/lora.py +++ b/src/peft/tuners/lora.py @@ -484,13 +484,21 @@ def forward(self, x: torch.Tensor): previous_dtype = x.dtype if self.active_adapter not in self.lora_A.keys(): - return F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias) + result = torch.matmul(x, transpose(self.weight, self.fan_in_fan_out)) + + if self.bias: + result += self.bias + return result + if self.disable_adapters: if self.r[self.active_adapter] > 0 and self.merged: self.unmerge() - result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias) + result = torch.matmul(x, transpose(self.weight, self.fan_in_fan_out)) + + if self.bias: + result += self.bias elif self.r[self.active_adapter] > 0 and not self.merged: - result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) + result = torch.matmul(x, transpose(self.weight, self.fan_in_fan_out)) if self.bias: result += self.bias @@ -504,7 +512,10 @@ def forward(self, x: torch.Tensor): * self.scaling[self.active_adapter] ) else: - result = F.linear(x, transpose(self.weight, self.fan_in_fan_out), bias=self.bias) + result = torch.matmul(x, transpose(self.weight, self.fan_in_fan_out)) + + if self.bias: + result += self.bias result = result.to(previous_dtype) From 585cebb82b48f0546e6de319756a3616ff201fe4 Mon Sep 17 00:00:00 2001 From: dumpmemory <64742282+dumpmemory@users.noreply.github.com> Date: Fri, 28 Apr 2023 10:36:24 +0800 Subject: [PATCH 5/7] add not back according to @tohtana's idea. --- src/peft/tuners/adalora.py | 6 +++--- src/peft/tuners/lora.py | 8 ++++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/peft/tuners/adalora.py b/src/peft/tuners/adalora.py index d2f77f6046..cea8fa5a89 100644 --- a/src/peft/tuners/adalora.py +++ b/src/peft/tuners/adalora.py @@ -425,7 +425,7 @@ def unmerge(self): def forward(self, x: torch.Tensor): if self.active_adapter not in self.lora_A.keys(): - result = torch.matmul(x, transpose(self.weight, self.fan_in_fan_out)) + result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) if self.bias: result += self.bias @@ -434,7 +434,7 @@ def forward(self, x: torch.Tensor): if self.disable_adapters: if self.r[self.active_adapter] > 0 and self.merged: self.unmerge() - result = torch.matmul(x, transpose(self.weight, self.fan_in_fan_out)) + result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) if self.bias: result += self.bias @@ -454,7 +454,7 @@ def forward(self, x: torch.Tensor): / (self.ranknum[self.active_adapter] + 1e-5) ) else: - result = torch.matmul(x, transpose(self.weight, self.fan_in_fan_out)) + result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) if self.bias: result += self.bias diff --git a/src/peft/tuners/lora.py b/src/peft/tuners/lora.py index 19a0fa41ab..2991692713 100644 --- a/src/peft/tuners/lora.py +++ b/src/peft/tuners/lora.py @@ -484,7 +484,7 @@ def forward(self, x: torch.Tensor): previous_dtype = x.dtype if self.active_adapter not in self.lora_A.keys(): - result = torch.matmul(x, transpose(self.weight, self.fan_in_fan_out)) + result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) if self.bias: result += self.bias @@ -493,12 +493,12 @@ def forward(self, x: torch.Tensor): if self.disable_adapters: if self.r[self.active_adapter] > 0 and self.merged: self.unmerge() - result = torch.matmul(x, transpose(self.weight, self.fan_in_fan_out)) + result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) if self.bias: result += self.bias elif self.r[self.active_adapter] > 0 and not self.merged: - result = torch.matmul(x, transpose(self.weight, self.fan_in_fan_out)) + result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) if self.bias: result += self.bias @@ -512,7 +512,7 @@ def forward(self, x: torch.Tensor): * self.scaling[self.active_adapter] ) else: - result = torch.matmul(x, transpose(self.weight, self.fan_in_fan_out)) + result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) if self.bias: result += self.bias From 0dde8068f23599d5861f824f5fc437a4b6db259f Mon Sep 17 00:00:00 2001 From: dumpmemory <64742282+dumpmemory@users.noreply.github.com> Date: Fri, 5 May 2023 01:18:33 +0000 Subject: [PATCH 6/7] fix style and quality issues --- src/peft/tuners/adalora.py | 17 ++++++++--------- src/peft/tuners/lora.py | 17 ++++++++--------- 2 files changed, 16 insertions(+), 18 deletions(-) diff --git a/src/peft/tuners/adalora.py b/src/peft/tuners/adalora.py index cea8fa5a89..4d498d4a15 100644 --- a/src/peft/tuners/adalora.py +++ b/src/peft/tuners/adalora.py @@ -6,7 +6,6 @@ import torch import torch.nn as nn -import torch.nn.functional as F from transformers.pytorch_utils import Conv1D from ..utils import ( @@ -425,8 +424,8 @@ def unmerge(self): def forward(self, x: torch.Tensor): if self.active_adapter not in self.lora_A.keys(): - result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) - + result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) + if self.bias: result += self.bias return result @@ -434,16 +433,16 @@ def forward(self, x: torch.Tensor): if self.disable_adapters: if self.r[self.active_adapter] > 0 and self.merged: self.unmerge() - result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) - + result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) + if self.bias: result += self.bias elif self.r[self.active_adapter] > 0 and not self.merged: result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) - + if self.bias: result += self.bias - + result += ( ( self.lora_dropout[self.active_adapter](x) @@ -454,8 +453,8 @@ def forward(self, x: torch.Tensor): / (self.ranknum[self.active_adapter] + 1e-5) ) else: - result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) - + result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) + if self.bias: result += self.bias diff --git a/src/peft/tuners/lora.py b/src/peft/tuners/lora.py index 2991692713..cea781da5b 100644 --- a/src/peft/tuners/lora.py +++ b/src/peft/tuners/lora.py @@ -21,7 +21,6 @@ import torch import torch.nn as nn -import torch.nn.functional as F from transformers.pytorch_utils import Conv1D from ..import_utils import is_bnb_available @@ -484,8 +483,8 @@ def forward(self, x: torch.Tensor): previous_dtype = x.dtype if self.active_adapter not in self.lora_A.keys(): - result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) - + result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) + if self.bias: result += self.bias return result @@ -493,13 +492,13 @@ def forward(self, x: torch.Tensor): if self.disable_adapters: if self.r[self.active_adapter] > 0 and self.merged: self.unmerge() - result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) - + result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) + if self.bias: result += self.bias elif self.r[self.active_adapter] > 0 and not self.merged: - result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) - + result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) + if self.bias: result += self.bias @@ -512,8 +511,8 @@ def forward(self, x: torch.Tensor): * self.scaling[self.active_adapter] ) else: - result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) - + result = torch.matmul(x, transpose(self.weight, not self.fan_in_fan_out)) + if self.bias: result += self.bias From f09e563671fbf99d1af71ba5f13533f92899c9fd Mon Sep 17 00:00:00 2001 From: dumpmemory <64742282+dumpmemory@users.noreply.github.com> Date: Wed, 10 May 2023 06:20:49 +0000 Subject: [PATCH 7/7] merge main and update missing F --- src/peft/tuners/lora.py | 1 + 1 file changed, 1 insertion(+) diff --git a/src/peft/tuners/lora.py b/src/peft/tuners/lora.py index fef958cb8a..572f4836a4 100644 --- a/src/peft/tuners/lora.py +++ b/src/peft/tuners/lora.py @@ -21,6 +21,7 @@ import torch import torch.nn as nn +import torch.nn.functional as F from transformers.pytorch_utils import Conv1D from ..import_utils import is_bnb_available