Fix upstream regression when there's no HPU device (AutoGPTQ#701)

@HolyFalafel LGTM
OpenBMB · Jun 29, 2024 · 52eaddd · 52eaddd
1 parent 8d8e0ea
commit 52eaddd
Show file tree

Hide file tree

Showing 4 changed files with 29 additions and 5 deletions.
diff --git a/auto_gptq/nn_modules/qlinear/qlinear_hpu.py b/auto_gptq/nn_modules/qlinear/qlinear_hpu.py
@@ -5,7 +5,19 @@
 import torch
 import torch.nn as nn
 import transformers
-import habana_frameworks.torch.core as htcore
+try:
+    import habana_frameworks.torch.core as htcore
+    convert_from_uint4 = torch.ops.hpu.convert_from_uint4
+except Exception as e:
+    hpu_import_exception = e
+
+    def error_raiser_hpu(*args, **kwargs):
+        raise ValueError(
+            f"Trying to use HPU, but could not import the HPU framework with the following error: {hpu_import_exception}"
+        )
+
+    convert_from_uint4 = error_raiser_hpu
+
 
 logger = getLogger(__name__)
 
@@ -118,7 +130,7 @@ def forward(self, x):
         scales = self.scales
         qweight = self.qweight
         zeros = self.qzeros
-        weight = torch.ops.hpu.convert_from_uint4(qweight, scales, zeros, x_dtype)
+        weight = convert_from_uint4(qweight, scales, zeros, x_dtype)
         out = torch.matmul(x, weight)
         out = out.reshape(out_shape)
         out = out + self.bias if self.bias is not None else out

diff --git a/auto_gptq/utils/import_utils.py b/auto_gptq/utils/import_utils.py
@@ -69,7 +69,7 @@ def dynamically_import_QuantLinear(
 ):
     try:
         import habana_frameworks.torch.hpu  # noqa: F401
-    except ImportError as e:
+    except Exception as e:
         pass
     else:
         from ..nn_modules.qlinear.qlinear_hpu import QuantLinear

diff --git a/tests/test_hpu_linear.py b/tests/test_hpu_linear.py
@@ -2,7 +2,10 @@
 import math
 import torch
 import pytest
-import habana_frameworks.torch.core as htcore
+try:
+    import habana_frameworks.torch.core as htcore
+except Exception as e:
+    pytestmark = pytest.mark.skip("Couldn't import HPU plugin, skipping HPU tests")
 
 def _convert_to_tensor_list(tensor_or_tensors):
     if isinstance(tensor_or_tensors, tuple):

diff --git a/tests/test_q4.py b/tests/test_q4.py
@@ -7,7 +7,6 @@
 from auto_gptq.nn_modules.qlinear.qlinear_marlin import QuantLinear as MarlinQuantLinear
 from auto_gptq.nn_modules.qlinear.qlinear_tritonv2 import QuantLinear as TritonV2QuantLinear
 from auto_gptq.utils.import_utils import dynamically_import_QuantLinear
-import habana_frameworks.torch.core as htcore
 
 
 try:
@@ -2205,6 +2204,11 @@ class TestQ4HPU(unittest.TestCase):
         ]
     )
     def test_generation(self, in_device, model_dtype):
+        try:
+            import habana_frameworks.torch.core as htcore
+        except Exception as e:
+            self.skipTest("Couldn't import HPU plugin, skipping HPU tests")
+
         # Reference generated with the cuda-old kernel and TheBloke/Llama-2-7B-Chat-GPTQ
         reference_output = "<s> I am in Paris and I am feeling very sad and lonely. everybody I know is busy and I don't have any friends here. I am staying in a small apartment in the 11th arrondissement and I am feeling very isolated. I miss my friends and family back home and I don'"
 
@@ -2270,6 +2274,11 @@ def test_generation(self, in_device, model_dtype):
         ]
     )
     def test_bias(self, in_device, model_dtype):
+        try:
+            import habana_frameworks.torch.core as htcore
+        except Exception as e:
+            self.skipTest("Couldn't import HPU plugin, skipping HPU tests")
+
         device = torch.device(in_device)
         # TheBloke/Llama-2-7B-Chat-GPTQ has bias, but they are all zeros, use a checkpoint which really uses bias.
         model_id = "s3nh/starcoderbase-1b-GPTQ"