add tolerance

microsoft · Sep 25, 2024 · bcd551f · bcd551f
1 parent c632221
commit bcd551f
Showing 1 changed file with 26 additions and 0 deletions.
diff --git a/onnxruntime/test/providers/cpu/math/gemm_test.cc b/onnxruntime/test/providers/cpu/math/gemm_test.cc
@@ -64,6 +64,7 @@ TEST(GemmOpTest, GemmNoTrans_f16) {
     test.AddInput<MLFloat16>("B", {4, 3}, f_B);
     test.AddInput<MLFloat16>("C", {2, 3}, f_C);
     test.AddOutput<MLFloat16>("Y", {2, 3}, f_Y);
+    tester.SetOutputTolerance(0.005f);
     test.ConfigExcludeEps({kTensorrtExecutionProvider})  // TensorRT: fp16 is not supported
         .Config(run_with_tunable_op)
         .RunWithConfig();
@@ -87,6 +88,7 @@ TEST(GemmOpTest, GemmNoTrans_f16) {
     test.AddInput<MLFloat16>("B", {4, 3}, f_B, true);
     test.AddInput<MLFloat16>("C", {3}, f_C, true);
     test.AddOutput<MLFloat16>("Y", {2, 3}, f_Y);
+    tester.SetOutputTolerance(0.005f);
     test.ConfigExcludeEps({kTensorrtExecutionProvider})  // TensorRT: fp16 is not supported
         .Config(run_with_tunable_op)
         .RunWithConfig();
@@ -109,10 +111,33 @@ TEST(GemmOpTest, GemmNoTrans_f16) {
     test.AddInput<MLFloat16>("B", {4, 3}, f_B, true);
     test.AddInput<MLFloat16>("C", {1}, f_C, true);
     test.AddOutput<MLFloat16>("Y", {2, 3}, f_Y);
+    tester.SetOutputTolerance(0.005f);
     test.ConfigExcludeEps({kTensorrtExecutionProvider})  // TensorRT: fp16 is not supported
         .Config(run_with_tunable_op)
         .RunWithConfig();
   }
+}
+
+
+// Only CUDA, ROCM and CoreML kernels have float 16 support
+TEST(GemmOpTest, GemmTransB_f16) {
+#ifdef USE_CUDA
+  int min_cuda_architecture = 530;
+  if (!HasCudaEnvironment(min_cuda_architecture)) {
+    LOGS_DEFAULT(WARNING) << "Hardware NOT support FP16";
+    return;
+  }
+#endif
+
+  std::vector<float> A{1.0f, 2.0f, 3.0f, 4.0f,
+                       -1.0f, -2.0f, -3.0f, -4.0f};
+  std::vector<float> B = {0.5f, 2.1f, 1.2f, -0.3f, -1.2f, 0.2f, 1.0f, -2.1f, 1.3f, 4.1f, 1.3f, -8.1f};
+  std::vector<float> C = {0.5f, 2.1f, 1.2f, -0.3f, -1.2f, 0.2f};
+
+  std::vector<MLFloat16> f_A(8);
+  std::vector<MLFloat16> f_B(12);
+  ConvertFloatToMLFloat16(A.data(), f_A.data(), 8);
+  ConvertFloatToMLFloat16(B.data(), f_B.data(), 12);
   {
     // bias is a scalar and  transB is True
     std::vector<MLFloat16> f_Y(6);
@@ -131,6 +156,7 @@ TEST(GemmOpTest, GemmNoTrans_f16) {
     test.AddInput<MLFloat16>("B", {3, 4}, f_B, true);
     test.AddInput<MLFloat16>("C", {1}, f_C, true);
     test.AddOutput<MLFloat16>("Y", {2, 3}, f_Y);
+    tester.SetOutputTolerance(0.005f);
     test.ConfigExcludeEps({kTensorrtExecutionProvider})  // TensorRT: fp16 is not supported
         .Config(run_with_tunable_op)
         .RunWithConfig();