huggingface · BenjaminBossan · Feb 19, 2024 · Oct 10, 2023 · Oct 11, 2023 · Dec 6, 2023
diff --git a/src/peft/tuners/lora/layer.py b/src/peft/tuners/lora/layer.py
@@ -251,7 +251,8 @@ def __init__(
         r: int = 0,
         lora_alpha: int = 1,
         lora_dropout: float = 0.0,
-        fan_in_fan_out: bool = False,  # Set this to True if the layer to replace stores weight like (fan_in, fan_out)
+        fan_in_fan_out: bool = False,
+        # Set this to True if the layer to replace stores weight like (fan_in, fan_out)
         is_target_conv_1d_layer: bool = False,
         init_lora_weights: Union[bool, str] = True,
         **kwargs,
@@ -352,8 +353,6 @@ def get_delta_weight(self, adapter) -> torch.Tensor:
         return output_tensor
 
     def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor:
-        previous_dtype = x.dtype
-
         if self.disable_adapters:
             if self.merged:
                 self.unmerge()
@@ -362,6 +361,7 @@ def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor:
             result = self.base_layer(x, *args, **kwargs)
         else:
             result = self.base_layer(x, *args, **kwargs)
+            torch_result_dtype = result.dtype
             for active_adapter in self.active_adapters:
                 if active_adapter not in self.lora_A.keys():
                     continue
@@ -372,7 +372,7 @@ def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor:
                 x = x.to(lora_A.weight.dtype)
                 result += lora_B(lora_A(dropout(x))) * scaling
 
-        result = result.to(previous_dtype)
+            result = result.to(torch_result_dtype)
         return result
 
     def __repr__(self) -> str:
@@ -507,6 +507,7 @@ def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor:
             result = self.base_layer(x, *args, **kwargs)
         else:
             result = self.base_layer(x, *args, **kwargs)
+            torch_result_dtype = result.dtype
             for active_adapter in self.active_adapters:
                 if active_adapter not in self.lora_embedding_A:
                     continue
@@ -515,6 +516,7 @@ def forward(self, x: torch.Tensor, *args: Any, **kwargs: Any) -> torch.Tensor:
                 scaling = self.scaling[active_adapter]
                 after_A = self._embed(x, embedding_A)
                 result += (after_A @ embedding_B) * scaling
+            result = result.to(torch_result_dtype)
 
         return result
 
@@ -642,8 +644,6 @@ def get_delta_weight(self, adapter) -> torch.Tensor:
         return output_tensor
 
     def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
-        previous_dtype = x.dtype
-
         if self.disable_adapters:
             if self.merged:
                 self.unmerge()
@@ -652,6 +652,8 @@ def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
             result = self.base_layer(x, *args, **kwargs)
         else:
             result = self.base_layer(x, *args, **kwargs)
+            torch_result_dtype = result.dtype
+
             for active_adapter in self.active_adapters:
                 if active_adapter not in self.lora_A.keys():
                     continue
@@ -662,7 +664,7 @@ def forward(self, x: torch.Tensor, *args, **kwargs) -> torch.Tensor:
                 x = x.to(lora_A.weight.dtype)
                 result += lora_B(lora_A(dropout(x))) * scaling
 
-        result = result.to(previous_dtype)
+            result = result.to(torch_result_dtype)
         return result
 
     def __repr__(self) -> str:

diff --git a/tests/test_autocast_torchcompatibility_lora.py b/tests/test_autocast_torchcompatibility_lora.py
@@ -0,0 +1,156 @@
+import unittest
+
+import torch
+import torch.nn as nn
+
+from peft.tuners.lora import Conv2d as LoraConv2d
+from peft.tuners.lora import Embedding as LoraEmbedding
+from peft.tuners.lora import Linear as LoraLinear
+
+
+class SimpleModel(nn.Module):
+    def __init__(self):
+        super(SimpleModel, self).__init__()
-        super(SimpleModel, self).__init__()
+        super().__init__()
-        super(SimpleModel, self).__init__()
+        super().__init__()
+
+        self.embedding_layer = nn.Embedding(1000, 768)
+        self.layer_norm = nn.LayerNorm(768)
+        self.linear_transform = nn.Linear(768, 256)
+
+    def forward(self, input_ids):
+        embedded_output = self.embedding_layer(input_ids)
+        norm_output = self.layer_norm(embedded_output)
+        linear_output = self.linear_transform(norm_output)
+
+        return linear_output
+
+
+class SimpleConv2DModel(nn.Module):
+    def __init__(self):
+        super(SimpleConv2DModel, self).__init__()
+
+        self.embedding_layer = nn.Embedding(1000, 768)
+        self.layer_norm = nn.LayerNorm(768)
+        self.conv2d_transform = nn.Conv2d(1, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
+
+    def forward(self, input_ids):
+        # Additional layers for your custom model
+        embedded_output = self.embedding_layer(input_ids)
+        norm_output = self.layer_norm(embedded_output)
+
+        # Reshape for Conv2d input (add batch size dimension)
+        norm_output = norm_output.unsqueeze(1)
+        conv_output = self.conv2d_transform(norm_output)
+
+        # Remove batch size dimension
+        conv_output = conv_output.squeeze(1)
+
+        return conv_output
+
+
+class SimpleLorALinearModel(nn.Module):
+    """Same as SimpleModel but wraps Linear in Lora layer"""
+
+    def __init__(self):
+        super(SimpleLorALinearModel, self).__init__()
+
+        self.embedding_layer = nn.Embedding(1000, 768)
+        self.layer_norm = nn.LayerNorm(768)
+        self.linear_transform_base = nn.Linear(768, 256)
+        self.linear_transform = LoraLinear(
+            self.linear_transform_base, adapter_name="test_linear", r=8, lora_alpha=16, lora_dropout=0.05
+        )
+
+    def forward(self, input_ids):
+        embedded_output = self.embedding_layer(input_ids)
+        norm_output = self.layer_norm(embedded_output)
+        linear_output = self.linear_transform(norm_output)
+
+        return linear_output
+
+
+class SimpleLorAEmbeddingModel(nn.Module):
+    """Same as SimpleModel but wraps Embedding in Lora layer"""
+
+    def __init__(self):
+        super(SimpleLorAEmbeddingModel, self).__init__()
+
+        self.embedding_layer_base = nn.Embedding(1000, 768)
+        self.embedding_layer = LoraEmbedding(
+            self.embedding_layer_base, adapter_name="test_embedding", r=8, lora_alpha=16, lora_dropout=0.05
+        )
+        self.layer_norm = nn.LayerNorm(768)
+        self.linear_transform = nn.Linear(768, 256)
+
+    def forward(self, input_ids):
+        embedded_output = self.embedding_layer(input_ids)
+        norm_output = self.layer_norm(embedded_output)
+        linear_output = self.linear_transform(norm_output)
+
+        return linear_output
+
+
+class SimpleLorAConv2DModel(nn.Module):
+    """Same as SimpleModel but wraps Conv2D in Lora layer"""
+
+    def __init__(self):
+        super(SimpleLorAConv2DModel, self).__init__()
+
+        self.embedding_layer = nn.Embedding(1000, 768)
+        self.layer_norm = nn.LayerNorm(768)
+        self.conv2d_transform_base = nn.Conv2d(1, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
+        self.conv2d_transform = LoraConv2d(
+            self.conv2d_transform_base, adapter_name="test_conv2d", r=8, lora_alpha=16, lora_dropout=0.05
+        )
+
+    def forward(self, input_ids):
+        # Additional layers for your custom model
+        embedded_output = self.embedding_layer(input_ids)
+        norm_output = self.layer_norm(embedded_output)
+
+        # Reshape for Conv2d input (add batch size dimension)
+        norm_output = norm_output.unsqueeze(1)
+        conv_output = self.conv2d_transform(norm_output)
+
+        # Remove batch size dimension
+        conv_output = conv_output.squeeze(1)
+
+        return conv_output
+
+
+class TestAutoCast(unittest.TestCase):
 def require_torch_gpu(test_case): 
 def require_torch_gpu(test_case): 
+    def test_simple_model(self):
+        self._test_model(SimpleModel)
+
+    def test_simple_conv2d_model(self):
+        self._test_model(SimpleConv2DModel)
+
+    def test_simple_lora_linear_model(self):
+        self._test_model(SimpleLorALinearModel)
+
+    def test_simple_lora_embedding_model(self):
+        self._test_model(SimpleLorAEmbeddingModel)
+
+    def test_simple_lora_conv2d_model(self):
+        self._test_model(SimpleLorAConv2DModel)
+
+    def _test_model(self, model_class):
+        # Instantiate the model
+        model = model_class().cuda()
+
+        # Prepare dummy inputs
+        input_ids = torch.randint(0, 1000, (2, 10)).cuda()
+
+        # Forward pass with torch.bfloat16
+        with torch.autocast(enabled=True, dtype=torch.bfloat16, device_type="cuda"):
+            outputs = model(input_ids)
+            self.assertEqual(outputs.dtype, torch.bfloat16)
+
+        # Forward pass with torch.float32
+        with torch.autocast(enabled=True, dtype=torch.float32, device_type="cuda"):
+            outputs = model(input_ids)
+            self.assertEqual(outputs.dtype, torch.float32)
+
+        # Forward pass with torch.float16
+        with torch.autocast(enabled=True, dtype=torch.float16, device_type="cuda"):
+            outputs = model(input_ids)
+            self.assertEqual(outputs.dtype, torch.float16)