diff --git a/bitblas/gpu/intrin/lop3.py b/bitblas/gpu/intrin/lop3.py index 184d74211..a6a7011a0 100644 --- a/bitblas/gpu/intrin/lop3.py +++ b/bitblas/gpu/intrin/lop3.py @@ -1495,6 +1495,7 @@ def fast_decode_impl( (1, "int8", "float16", 8, "local", "uint", True, True, "rescale"), (4, "int8", "int8", 8, "local", "uint", False, False, "original"), (4, "int8", "int8", 16, "local", "uint", False, False, "original"), + (4, "int8", "int8", 16, "local", "int", False, False, "original"), (2, "int8", "int8", 16, "local", "uint", False, False, "original"), (2, "int8", "int8", 16, "local", "int", False, False, "original"), (1, "int8", "int8", 16, "local", "uint", False, False, "original"), @@ -1523,6 +1524,7 @@ def fast_decode_impl( (1, "int8", "float16", 8, "warp", "uint", True, True, "rescale"), (4, "int8", "int8", 8, "warp", "uint", False, False, "original"), (4, "int8", "int8", 16, "warp", "uint", False, False, "original"), + (4, "int8", "int8", 16, "warp", "int", False, False, "original"), (2, "int8", "int8", 16, "warp", "uint", False, False, "original"), (2, "int8", "int8", 16, "warp", "int", False, False, "original"), (1, "int8", "int8", 16, "warp", "uint", False, False, "original"),