diff --git a/auto_gptq/nn_modules/qlinear/qlinear_hpu.py b/auto_gptq/nn_modules/qlinear/qlinear_hpu.py index 0c998da2..757f3aaf 100644 --- a/auto_gptq/nn_modules/qlinear/qlinear_hpu.py +++ b/auto_gptq/nn_modules/qlinear/qlinear_hpu.py @@ -5,7 +5,19 @@ import torch import torch.nn as nn import transformers -import habana_frameworks.torch.core as htcore +try: + import habana_frameworks.torch.core as htcore + convert_from_uint4 = torch.ops.hpu.convert_from_uint4 +except Exception as e: + hpu_import_exception = e + + def error_raiser_hpu(*args, **kwargs): + raise ValueError( + f"Trying to use HPU, but could not import the HPU framework with the following error: {hpu_import_exception}" + ) + + convert_from_uint4 = error_raiser_hpu + logger = getLogger(__name__) @@ -118,7 +130,7 @@ def forward(self, x): scales = self.scales qweight = self.qweight zeros = self.qzeros - weight = torch.ops.hpu.convert_from_uint4(qweight, scales, zeros, x_dtype) + weight = convert_from_uint4(qweight, scales, zeros, x_dtype) out = torch.matmul(x, weight) out = out.reshape(out_shape) out = out + self.bias if self.bias is not None else out diff --git a/auto_gptq/utils/import_utils.py b/auto_gptq/utils/import_utils.py index bc27a994..0f0f1f58 100644 --- a/auto_gptq/utils/import_utils.py +++ b/auto_gptq/utils/import_utils.py @@ -69,7 +69,7 @@ def dynamically_import_QuantLinear( ): try: import habana_frameworks.torch.hpu # noqa: F401 - except ImportError as e: + except Exception as e: pass else: from ..nn_modules.qlinear.qlinear_hpu import QuantLinear diff --git a/tests/test_hpu_linear.py b/tests/test_hpu_linear.py index 0f141d01..62a37f5b 100644 --- a/tests/test_hpu_linear.py +++ b/tests/test_hpu_linear.py @@ -2,7 +2,10 @@ import math import torch import pytest -import habana_frameworks.torch.core as htcore +try: + import habana_frameworks.torch.core as htcore +except Exception as e: + pytestmark = pytest.mark.skip("Couldn't import HPU plugin, skipping HPU tests") def _convert_to_tensor_list(tensor_or_tensors): if isinstance(tensor_or_tensors, tuple): diff --git a/tests/test_q4.py b/tests/test_q4.py index a0383a78..b367eb5b 100644 --- a/tests/test_q4.py +++ b/tests/test_q4.py @@ -7,7 +7,6 @@ from auto_gptq.nn_modules.qlinear.qlinear_marlin import QuantLinear as MarlinQuantLinear from auto_gptq.nn_modules.qlinear.qlinear_tritonv2 import QuantLinear as TritonV2QuantLinear from auto_gptq.utils.import_utils import dynamically_import_QuantLinear -import habana_frameworks.torch.core as htcore try: @@ -2205,6 +2204,11 @@ class TestQ4HPU(unittest.TestCase): ] ) def test_generation(self, in_device, model_dtype): + try: + import habana_frameworks.torch.core as htcore + except Exception as e: + self.skipTest("Couldn't import HPU plugin, skipping HPU tests") + # Reference generated with the cuda-old kernel and TheBloke/Llama-2-7B-Chat-GPTQ reference_output = " I am in Paris and I am feeling very sad and lonely. everybody I know is busy and I don't have any friends here. I am staying in a small apartment in the 11th arrondissement and I am feeling very isolated. I miss my friends and family back home and I don'" @@ -2270,6 +2274,11 @@ def test_generation(self, in_device, model_dtype): ] ) def test_bias(self, in_device, model_dtype): + try: + import habana_frameworks.torch.core as htcore + except Exception as e: + self.skipTest("Couldn't import HPU plugin, skipping HPU tests") + device = torch.device(in_device) # TheBloke/Llama-2-7B-Chat-GPTQ has bias, but they are all zeros, use a checkpoint which really uses bias. model_id = "s3nh/starcoderbase-1b-GPTQ"