Skip to content

Commit

Permalink
Minor improvements
Browse files Browse the repository at this point in the history
  • Loading branch information
Xia-Weiwen committed May 10, 2024
1 parent 09cc153 commit 177bd39
Show file tree
Hide file tree
Showing 2 changed files with 5 additions and 2 deletions.
1 change: 1 addition & 0 deletions bitsandbytes/backends/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ def quantize_4bit(
quant_storage=torch.uint8,
) -> Tuple[torch.Tensor, QuantState]:
assert_on_cpu([A, absmax, out])
assert quant_storage == torch.uint8, "CPU backend only supports uint8 quant_storage"
return quantize_4bit_impl(A, absmax, out, blocksize, compress_statistics, quant_type)

def dequantize_4bit(
Expand Down
6 changes: 4 additions & 2 deletions bitsandbytes/backends/cpu_xpu_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,8 @@ def quantize_4bit_impl(
)

if ipex_cpu and _ipex_cpu_version_prereq(2, 2) and input_shape[0] % blocksize == 0:
# lowp_mode: lowest precision for computation
lowp_mode = ipex_cpu.quantization.WoqLowpMode.BF16
state.op_context = torch.ops.ipex_prepack.weight_only_qlinear_prepack(
out.reshape([input_shape[0], input_shape[1] // 2]),
ipex_cpu.quantization.WoqWeightDtype.NF4,
Expand All @@ -353,8 +355,8 @@ def quantize_4bit_impl(
None, # g_idx
None, # batch_size
blocksize,
int(ipex_cpu.quantization.WoqLowpMode.BF16),
-1, # act_quant_mode
int(lowp_mode),
-1, # act_quant_mode. -1 means don't quant activation
)

return out, state
Expand Down

0 comments on commit 177bd39

Please sign in to comment.