From bcd551f98f907dc6412e6cb2474b39f0ca77c26d Mon Sep 17 00:00:00 2001
From: wejoncy <wejoncy@163.com>
Date: Wed, 25 Sep 2024 02:27:08 -0700
Subject: [PATCH] add tolerance

---
 .../test/providers/cpu/math/gemm_test.cc      | 26 +++++++++++++++++++
 1 file changed, 26 insertions(+)
diff --git a/onnxruntime/test/providers/cpu/math/gemm_test.cc b/onnxruntime/test/providers/cpu/math/gemm_test.cc
index 4d44f2313a533..3123367f1e2bc 100644
--- a/onnxruntime/test/providers/cpu/math/gemm_test.cc
+++ b/onnxruntime/test/providers/cpu/math/gemm_test.cc
@@ -64,6 +64,7 @@ TEST(GemmOpTest, GemmNoTrans_f16) {
     test.AddInput<MLFloat16>("B", {4, 3}, f_B);
     test.AddInput<MLFloat16>("C", {2, 3}, f_C);
     test.AddOutput<MLFloat16>("Y", {2, 3}, f_Y);
+    tester.SetOutputTolerance(0.005f);
     test.ConfigExcludeEps({kTensorrtExecutionProvider})  // TensorRT: fp16 is not supported
         .Config(run_with_tunable_op)
         .RunWithConfig();
@@ -87,6 +88,7 @@ TEST(GemmOpTest, GemmNoTrans_f16) {
     test.AddInput<MLFloat16>("B", {4, 3}, f_B, true);
     test.AddInput<MLFloat16>("C", {3}, f_C, true);
     test.AddOutput<MLFloat16>("Y", {2, 3}, f_Y);
+    tester.SetOutputTolerance(0.005f);
     test.ConfigExcludeEps({kTensorrtExecutionProvider})  // TensorRT: fp16 is not supported
         .Config(run_with_tunable_op)
         .RunWithConfig();
@@ -109,10 +111,33 @@ TEST(GemmOpTest, GemmNoTrans_f16) {
     test.AddInput<MLFloat16>("B", {4, 3}, f_B, true);
     test.AddInput<MLFloat16>("C", {1}, f_C, true);
     test.AddOutput<MLFloat16>("Y", {2, 3}, f_Y);
+    tester.SetOutputTolerance(0.005f);
     test.ConfigExcludeEps({kTensorrtExecutionProvider})  // TensorRT: fp16 is not supported
         .Config(run_with_tunable_op)
         .RunWithConfig();
   }
+}
+
+
+// Only CUDA, ROCM and CoreML kernels have float 16 support
+TEST(GemmOpTest, GemmTransB_f16) {
+#ifdef USE_CUDA
+  int min_cuda_architecture = 530;
+  if (!HasCudaEnvironment(min_cuda_architecture)) {
+    LOGS_DEFAULT(WARNING) << "Hardware NOT support FP16";
+    return;
+  }
+#endif
+
+  std::vector<float> A{1.0f, 2.0f, 3.0f, 4.0f,
+                       -1.0f, -2.0f, -3.0f, -4.0f};
+  std::vector<float> B = {0.5f, 2.1f, 1.2f, -0.3f, -1.2f, 0.2f, 1.0f, -2.1f, 1.3f, 4.1f, 1.3f, -8.1f};
+  std::vector<float> C = {0.5f, 2.1f, 1.2f, -0.3f, -1.2f, 0.2f};
+
+  std::vector<MLFloat16> f_A(8);
+  std::vector<MLFloat16> f_B(12);
+  ConvertFloatToMLFloat16(A.data(), f_A.data(), 8);
+  ConvertFloatToMLFloat16(B.data(), f_B.data(), 12);
   {
     // bias is a scalar and  transB is True
     std::vector<MLFloat16> f_Y(6);
@@ -131,6 +156,7 @@ TEST(GemmOpTest, GemmNoTrans_f16) {
     test.AddInput<MLFloat16>("B", {3, 4}, f_B, true);
     test.AddInput<MLFloat16>("C", {1}, f_C, true);
     test.AddOutput<MLFloat16>("Y", {2, 3}, f_Y);
+    tester.SetOutputTolerance(0.005f);
     test.ConfigExcludeEps({kTensorrtExecutionProvider})  // TensorRT: fp16 is not supported
         .Config(run_with_tunable_op)
         .RunWithConfig();