Skip to content
This repository has been archived by the owner on Oct 25, 2024. It is now read-only.

Commit

Permalink
adapt python code
Browse files Browse the repository at this point in the history
Signed-off-by: changwangss <chang1.wang@intel.com>
  • Loading branch information
changwangss committed May 9, 2024
1 parent a0cac9f commit bf511a3
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 22 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -213,9 +213,8 @@ def set_weights_bias(
else:
g_idx = torch.empty(0, dtype=torch.int32)
if q_config.bits == 4:
int_weight = (int_weight - 8) * 16
gptq_scales = gptq_scales / 16
gptq_zeros = (gptq_zeros - 8) * 16
int_weight = int_weight - 8
gptq_zeros = gptq_zeros - 8

if q_config.sym:
gptq_zeros = torch.empty(0, dtype=torch.int8)
Expand Down Expand Up @@ -344,13 +343,12 @@ def recover_int_weight(g_idx, int_weight):
if scales_dtype is None:
assert False, "scales dtype only support fp32."
scales = qbits.acquire_packed_weight_info(self.weight, 9)
if bits == 4:
scales = scales * 16

zp = qbits.acquire_packed_weight_info(self.weight, 11)[0] != 0
if zp:
qzeros = qbits.acquire_packed_weight_info(self.weight, 10)
if bits == 4:
qzeros = qzeros // 16 + 8
qzeros = qzeros + 8
else:
qzeros = (qzeros.to(torch.int32) + 128).to(torch.uint8)
else:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -281,7 +281,6 @@ def post_init_cpu(self):
)

if self.bits == 4 and self.weight_dtype not in [
"int4_fullrange",
"int4_clip",
"nf4",
"fp4_e2m1_bnb",
Expand All @@ -300,7 +299,6 @@ def post_init_cpu(self):

elif self.weight_dtype not in [
"int8",
"int4_fullrange",
"int4_clip",
"nf4",
"fp4_e2m1_bnb",
Expand All @@ -310,7 +308,7 @@ def post_init_cpu(self):
]:
raise ValueError(
f"weight_dtype must be a string in "
f"'int8', 'int4_fullrange', 'int4_clip', 'nf4', 'fp4_e2m1_bnb', 'fp4_e2m1', 'fp8_e5m2, fp8_e4m3'"
f"'int8', 'int4_clip', 'nf4', 'fp4_e2m1_bnb', 'fp4_e2m1', 'fp8_e5m2, fp8_e4m3'"
)

if self.scale_dtype is not None and self.scale_dtype not in [
Expand Down
15 changes: 7 additions & 8 deletions tests/CI/test_quantization.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,18 +408,17 @@ def test_quantization_for_llm(self):

# weight-only
# RTN
woq_config = RtnConfig(bits=4, weight_dtype="int4_fullrange")
woq_config = RtnConfig(bits=4)
woq_model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
quantization_config=woq_config,
use_neural_speed=False
)
woq_model.eval()
output = woq_model(dummy_input)
self.assertTrue(isclose(float(output[0][0][0][0]), 0.16387596726417542, rel_tol=1e-04))
self.assertTrue(isclose(float(output[0][0][0][0]), 0.17631684243679047, rel_tol=1e-04))

# AWQ
woq_config = AwqConfig(bits=4,
weight_dtype="int4_fullrange",
zero_point=False,
calib_iters=5,
tokenizer=tokenizer
Expand All @@ -431,13 +430,13 @@ def test_quantization_for_llm(self):
)
woq_model.eval()
output = woq_model(dummy_input)
self.assertTrue(isclose(float(output[0][0][0][0]), 0.17998121678829193 , rel_tol=1e-04))
self.assertTrue(isclose(float(output[0][0][0][0]), 0.18019595742225647 , rel_tol=1e-04))

# TEQ
woq_config = TeqConfig(bits=4, weight_dtype="int4_fullrange",
calib_iters=5,
tokenizer=tokenizer,
)
woq_config = TeqConfig(bits=4,
calib_iters=5,
tokenizer=tokenizer,
)
woq_model = AutoModelForCausalLM.from_pretrained(model_name_or_path,
quantization_config=woq_config,
use_neural_speed=False
Expand Down
10 changes: 5 additions & 5 deletions tests/CI/test_weight_only.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,9 +86,9 @@ def tearDownClass(cls) -> None:

def test_woq_config(self):
config = RtnConfig(
bits=4, weight_dtype="int4_fullrange", group_size=32)
bits=4, weight_dtype="int4_clip", group_size=32)
diff_res = config.to_diff_dict()
ref_config = {'weight_dtype': 'int4_fullrange'}
ref_config = {'weight_dtype': 'int4_clip'}
self.assertEqual(diff_res, ref_config)
print(diff_res)
print(config.to_dict())
Expand Down Expand Up @@ -133,10 +133,10 @@ def test_int8(self):
def test_int4(self):
raw_wei = torch.rand(2, 32, dtype=torch.float)
compress_wei = qbits.quantize_to_packed_weight(
raw_wei, True, 32, "fp32", "int4_fullrange", "fp32", False)
raw_wei, True, 32, "fp32", "nf4", "fp32", False)
revert_wei = torch.zeros(2, 32, dtype=torch.float)
qbits.dequantize_packed_weight(compress_wei, revert_wei, True,
"fp32", "int4_fullrange", "fp32")
"fp32", "nf4", "fp32")
for bias in [True, False]:
model = M(with_bias=bias)
with torch.no_grad():
Expand All @@ -146,7 +146,7 @@ def test_int4(self):
with torch.no_grad():
model.linear.weight = torch.nn.Parameter(raw_wei)
config = RtnConfig(
bits=4, weight_dtype="int4_fullrange", group_size=32)
bits=4, weight_dtype="nf4", group_size=32)
config.post_init_cpu()
convert_to_quantized_model(model, config)
output_quant = model(activation)
Expand Down

0 comments on commit bf511a3

Please sign in to comment.