Skip to content

Commit

Permalink
dp4a works
Browse files Browse the repository at this point in the history
  • Loading branch information
masahi committed Apr 12, 2022
1 parent 4915c6a commit eb0cae2
Showing 1 changed file with 3 additions and 4 deletions.
7 changes: 3 additions & 4 deletions python/tvm/tir/tensor_intrin/dot_product_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,10 +45,9 @@ def dp4a_impl(
T.reads(C[0], A[0:4], B[0:4])
T.writes(C[0])

A_i8x4 = B.vload([0], "int8x4")
B_i8x4 = B.vload([0], "int8x4")

C[0] = T.call_pure_extern("__dp4a", A_i8x4, B_i8x4, C[0], dtype="int32")
C[0] += T.call_pure_extern(
"__dp4a", A.vload([0], "int8x4"), B.vload([0], "int8x4"), T.int32(0), dtype="int32"
)


DP4A_INTRIN = "dp4a"
Expand Down

0 comments on commit eb0cae2

Please sign in to comment.