Skip to content

Commit

Permalink
add missing Q5_0 test
Browse files Browse the repository at this point in the history
  • Loading branch information
Isotr0py committed Sep 14, 2024
1 parent 3f03934 commit d50294c
Showing 1 changed file with 19 additions and 7 deletions.
26 changes: 19 additions & 7 deletions tests/kernels/test_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ def get_gguf_sample_tensors(
NUM_TOKENS = [7, 83, 128, 2048] # Arbitrary values for testing
SEEDS = [0]
QUANT_TYPES = [
# i-matrix
GGMLQuantizationType.IQ1_M,
GGMLQuantizationType.IQ1_S,
GGMLQuantizationType.IQ2_S,
Expand All @@ -35,12 +36,15 @@ def get_gguf_sample_tensors(
GGMLQuantizationType.IQ3_XXS,
GGMLQuantizationType.IQ4_NL,
GGMLQuantizationType.IQ4_XS,
# k-quants
GGMLQuantizationType.Q2_K,
GGMLQuantizationType.Q3_K,
GGMLQuantizationType.Q4_0,
GGMLQuantizationType.Q4_K,
GGMLQuantizationType.Q5_K,
GGMLQuantizationType.Q6_K,
# standard quantization
GGMLQuantizationType.Q4_0,
GGMLQuantizationType.Q5_0,
GGMLQuantizationType.Q8_0,
]

Expand Down Expand Up @@ -89,12 +93,20 @@ def test_mmvq(hidden_size: int, dtype: torch.dtype,
@pytest.mark.parametrize("num_tokens", NUM_TOKENS)
@pytest.mark.parametrize("hidden_size", HIDDEN_SIZES)
@pytest.mark.parametrize("dtype", DTYPES)
@pytest.mark.parametrize("quant_type", [
GGMLQuantizationType.Q2_K, GGMLQuantizationType.Q3_K,
GGMLQuantizationType.Q4_0, GGMLQuantizationType.Q4_K,
GGMLQuantizationType.Q5_K, GGMLQuantizationType.Q6_K,
GGMLQuantizationType.Q8_0
])
@pytest.mark.parametrize(
"quant_type",
[
# k-quants
GGMLQuantizationType.Q2_K,
GGMLQuantizationType.Q3_K,
GGMLQuantizationType.Q4_K,
GGMLQuantizationType.Q5_K,
GGMLQuantizationType.Q6_K,
# standard quants
GGMLQuantizationType.Q4_0,
GGMLQuantizationType.Q5_0,
GGMLQuantizationType.Q8_0,
])
@torch.inference_mode()
def test_mmq(num_tokens: int, hidden_size: int, dtype: torch.dtype,
quant_type: GGMLQuantizationType):
Expand Down

0 comments on commit d50294c

Please sign in to comment.