diff --git a/python/tvm/tir/tensor_intrin/arm_cpu.py b/python/tvm/tir/tensor_intrin/arm_cpu.py index fa28cd80c682..054b678b44e2 100644 --- a/python/tvm/tir/tensor_intrin/arm_cpu.py +++ b/python/tvm/tir/tensor_intrin/arm_cpu.py @@ -20,7 +20,9 @@ @T.prim_func def dot_product_4x4_i8i8i32_desc( - A: T.Buffer[(4,), "int8"], B: T.Buffer[(4, 4), "int8"], C: T.Buffer[(4,), "int32"] + A: T.Buffer((4,), "int8", offset_factor=1), + B: T.Buffer((4, 4), "int8", offset_factor=1), + C: T.Buffer((4,), "int32", offset_factor=1), ) -> None: with T.block("root"): T.reads(C[0:4], A[0:4], B[0:4, 0:4]) @@ -36,7 +38,9 @@ def dot_product_4x4_i8i8i32_desc( @T.prim_func def dot_product_4x4_i8i8i32_neon( - A: T.Buffer[(4,), "int8"], B: T.Buffer[(4, 4), "int8"], C: T.Buffer[(4,), "int32"] + A: T.Buffer((4,), "int8", offset_factor=1), + B: T.Buffer((4, 4), "int8", offset_factor=1), + C: T.Buffer((4,), "int32", offset_factor=1), ) -> None: with T.block("root"): T.reads(C[0:4], A[0:4], B[0:4, 0:4]) @@ -92,7 +96,9 @@ def dot_product_4x4_i8i8i32_neon( @T.prim_func def dot_product_4x4_i8i8i32_sdot( - A: T.Buffer[(4,), "int8"], B: T.Buffer[(4, 4), "int8"], C: T.Buffer[(4,), "int32"] + A: T.Buffer((4,), "int8", offset_factor=1), + B: T.Buffer((4, 4), "int8", offset_factor=1), + C: T.Buffer((4,), "int32", offset_factor=1), ) -> None: with T.block("root"): T.reads(C[0:4], A[0:4], B[0:4, 0:4]) diff --git a/python/tvm/tir/tensor_intrin/x86.py b/python/tvm/tir/tensor_intrin/x86.py index 1d6accd9191b..c0c551071c80 100644 --- a/python/tvm/tir/tensor_intrin/x86.py +++ b/python/tvm/tir/tensor_intrin/x86.py @@ -24,7 +24,9 @@ @T.prim_func def dot_product_16x4_u8i8i32_desc( - A: T.Buffer[(4,), "uint8"], B: T.Buffer[(16, 4), "int8"], C: T.Buffer[(16,), "int32"] + A: T.Buffer((4,), "uint8", offset_factor=1), + B: T.Buffer((16, 4), "int8", offset_factor=1), + C: T.Buffer((16,), "int32", offset_factor=1), ) -> None: with T.block("root"): T.reads(C[0:16], A[0:4], B[0:16, 0:4]) @@ -40,7 +42,9 @@ def dot_product_16x4_u8i8i32_desc( @T.prim_func def dot_product_16x4_u8i8i32_vnni( - A: T.Buffer[(4,), "uint8"], B: T.Buffer[(16, 4), "int8"], C: T.Buffer[(16,), "int32"] + A: T.Buffer((4,), "uint8", offset_factor=1), + B: T.Buffer((16, 4), "int8", offset_factor=1), + C: T.Buffer((16,), "int32", offset_factor=1), ) -> None: with T.block("root"): T.reads(C[0:16], A[0:4], B[0:16, 0:4])