Skip to content

Commit

Permalink
rocm sdot4 works
Browse files Browse the repository at this point in the history
  • Loading branch information
masahi committed Apr 13, 2022
1 parent 6cc6280 commit f4562b9
Showing 1 changed file with 5 additions and 1 deletion.
6 changes: 5 additions & 1 deletion python/tvm/topi/cuda/tensor_intrin.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,11 @@ def _instr(index):
vec_y = yy.vload(0, dtype=vec_y_dtype)
prev_z = 0 if index == 0 else zz.vload(0)

new_z = tvm.tir.call_pure_extern(zz_dtype, "__dp4a", vec_x, vec_y, prev_z)
# new_z = tvm.tir.call_pure_extern(zz_dtype, "__dp4a", vec_x, vec_y, prev_z)
new_z = tvm.tir.call_llvm_pure_intrin(zz_dtype, "llvm.amdgcn.sdot4", tvm.tir.const(4, "uint32"),
tvm.tir.call_intrin("int32", "tir.reinterpret", vec_x),
tvm.tir.call_intrin("int32", "tir.reinterpret", vec_y),
prev_z, True)
ib.emit(zz.vstore(0, new_z))

return ib.get()
Expand Down

0 comments on commit f4562b9

Please sign in to comment.