Skip to content

Commit

Permalink
Fix upstream regression when there's no HPU device (AutoGPTQ#701)
Browse files Browse the repository at this point in the history
  • Loading branch information
HolyFalafel authored Jun 29, 2024
1 parent 8d8e0ea commit 52eaddd
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 5 deletions.
16 changes: 14 additions & 2 deletions auto_gptq/nn_modules/qlinear/qlinear_hpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,19 @@
import torch
import torch.nn as nn
import transformers
import habana_frameworks.torch.core as htcore
try:
import habana_frameworks.torch.core as htcore
convert_from_uint4 = torch.ops.hpu.convert_from_uint4
except Exception as e:
hpu_import_exception = e

def error_raiser_hpu(*args, **kwargs):
raise ValueError(
f"Trying to use HPU, but could not import the HPU framework with the following error: {hpu_import_exception}"
)

convert_from_uint4 = error_raiser_hpu


logger = getLogger(__name__)

Expand Down Expand Up @@ -118,7 +130,7 @@ def forward(self, x):
scales = self.scales
qweight = self.qweight
zeros = self.qzeros
weight = torch.ops.hpu.convert_from_uint4(qweight, scales, zeros, x_dtype)
weight = convert_from_uint4(qweight, scales, zeros, x_dtype)
out = torch.matmul(x, weight)
out = out.reshape(out_shape)
out = out + self.bias if self.bias is not None else out
Expand Down
2 changes: 1 addition & 1 deletion auto_gptq/utils/import_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ def dynamically_import_QuantLinear(
):
try:
import habana_frameworks.torch.hpu # noqa: F401
except ImportError as e:
except Exception as e:
pass
else:
from ..nn_modules.qlinear.qlinear_hpu import QuantLinear
Expand Down
5 changes: 4 additions & 1 deletion tests/test_hpu_linear.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,10 @@
import math
import torch
import pytest
import habana_frameworks.torch.core as htcore
try:
import habana_frameworks.torch.core as htcore
except Exception as e:
pytestmark = pytest.mark.skip("Couldn't import HPU plugin, skipping HPU tests")

def _convert_to_tensor_list(tensor_or_tensors):
if isinstance(tensor_or_tensors, tuple):
Expand Down
11 changes: 10 additions & 1 deletion tests/test_q4.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
from auto_gptq.nn_modules.qlinear.qlinear_marlin import QuantLinear as MarlinQuantLinear
from auto_gptq.nn_modules.qlinear.qlinear_tritonv2 import QuantLinear as TritonV2QuantLinear
from auto_gptq.utils.import_utils import dynamically_import_QuantLinear
import habana_frameworks.torch.core as htcore


try:
Expand Down Expand Up @@ -2205,6 +2204,11 @@ class TestQ4HPU(unittest.TestCase):
]
)
def test_generation(self, in_device, model_dtype):
try:
import habana_frameworks.torch.core as htcore
except Exception as e:
self.skipTest("Couldn't import HPU plugin, skipping HPU tests")

# Reference generated with the cuda-old kernel and TheBloke/Llama-2-7B-Chat-GPTQ
reference_output = "<s> I am in Paris and I am feeling very sad and lonely. everybody I know is busy and I don't have any friends here. I am staying in a small apartment in the 11th arrondissement and I am feeling very isolated. I miss my friends and family back home and I don'"

Expand Down Expand Up @@ -2270,6 +2274,11 @@ def test_generation(self, in_device, model_dtype):
]
)
def test_bias(self, in_device, model_dtype):
try:
import habana_frameworks.torch.core as htcore
except Exception as e:
self.skipTest("Couldn't import HPU plugin, skipping HPU tests")

device = torch.device(in_device)
# TheBloke/Llama-2-7B-Chat-GPTQ has bias, but they are all zeros, use a checkpoint which really uses bias.
model_id = "s3nh/starcoderbase-1b-GPTQ"
Expand Down

0 comments on commit 52eaddd

Please sign in to comment.