Merge pull request #35 from kaixih:use_fast_accumulation_fp8

PiperOrigin-RevId: 582770579
google · Nov 15, 2023 · e7c8561 · e7c8561
2 parents 5420b56 + 2cefb21
commit e7c8561
Showing 1 changed file with 3 additions and 1 deletion.
diff --git a/praxis/layers/injection/fp8_nvidia_gpu.py b/praxis/layers/injection/fp8_nvidia_gpu.py
@@ -103,7 +103,9 @@ def __call__(self, equation: str, *args: pytypes.JTensor) -> pytypes.JTensor:
     k_qdq = fp8_ops.in_qdq(
         comp_dtype, k, theta.kernel_scale, theta.kernel_amax_history
     )
-    y_qdq = jnp.einsum(equation, x_qdq, k_qdq)
+    y_qdq = jnp.einsum(
+        equation, x_qdq, k_qdq, _dot_general=fp8_ops.dot_general_with_precision
+    )
     y = fp8_ops.out_qdq(
         comp_dtype,
         y_qdq,