From bcd551f98f907dc6412e6cb2474b39f0ca77c26d Mon Sep 17 00:00:00 2001 From: wejoncy Date: Wed, 25 Sep 2024 02:27:08 -0700 Subject: [PATCH] add tolerance --- .../test/providers/cpu/math/gemm_test.cc | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/onnxruntime/test/providers/cpu/math/gemm_test.cc b/onnxruntime/test/providers/cpu/math/gemm_test.cc index 4d44f2313a533..3123367f1e2bc 100644 --- a/onnxruntime/test/providers/cpu/math/gemm_test.cc +++ b/onnxruntime/test/providers/cpu/math/gemm_test.cc @@ -64,6 +64,7 @@ TEST(GemmOpTest, GemmNoTrans_f16) { test.AddInput("B", {4, 3}, f_B); test.AddInput("C", {2, 3}, f_C); test.AddOutput("Y", {2, 3}, f_Y); + tester.SetOutputTolerance(0.005f); test.ConfigExcludeEps({kTensorrtExecutionProvider}) // TensorRT: fp16 is not supported .Config(run_with_tunable_op) .RunWithConfig(); @@ -87,6 +88,7 @@ TEST(GemmOpTest, GemmNoTrans_f16) { test.AddInput("B", {4, 3}, f_B, true); test.AddInput("C", {3}, f_C, true); test.AddOutput("Y", {2, 3}, f_Y); + tester.SetOutputTolerance(0.005f); test.ConfigExcludeEps({kTensorrtExecutionProvider}) // TensorRT: fp16 is not supported .Config(run_with_tunable_op) .RunWithConfig(); @@ -109,10 +111,33 @@ TEST(GemmOpTest, GemmNoTrans_f16) { test.AddInput("B", {4, 3}, f_B, true); test.AddInput("C", {1}, f_C, true); test.AddOutput("Y", {2, 3}, f_Y); + tester.SetOutputTolerance(0.005f); test.ConfigExcludeEps({kTensorrtExecutionProvider}) // TensorRT: fp16 is not supported .Config(run_with_tunable_op) .RunWithConfig(); } +} + + +// Only CUDA, ROCM and CoreML kernels have float 16 support +TEST(GemmOpTest, GemmTransB_f16) { +#ifdef USE_CUDA + int min_cuda_architecture = 530; + if (!HasCudaEnvironment(min_cuda_architecture)) { + LOGS_DEFAULT(WARNING) << "Hardware NOT support FP16"; + return; + } +#endif + + std::vector A{1.0f, 2.0f, 3.0f, 4.0f, + -1.0f, -2.0f, -3.0f, -4.0f}; + std::vector B = {0.5f, 2.1f, 1.2f, -0.3f, -1.2f, 0.2f, 1.0f, -2.1f, 1.3f, 4.1f, 1.3f, -8.1f}; + std::vector C = {0.5f, 2.1f, 1.2f, -0.3f, -1.2f, 0.2f}; + + std::vector f_A(8); + std::vector f_B(12); + ConvertFloatToMLFloat16(A.data(), f_A.data(), 8); + ConvertFloatToMLFloat16(B.data(), f_B.data(), 12); { // bias is a scalar and transB is True std::vector f_Y(6); @@ -131,6 +156,7 @@ TEST(GemmOpTest, GemmNoTrans_f16) { test.AddInput("B", {3, 4}, f_B, true); test.AddInput("C", {1}, f_C, true); test.AddOutput("Y", {2, 3}, f_Y); + tester.SetOutputTolerance(0.005f); test.ConfigExcludeEps({kTensorrtExecutionProvider}) // TensorRT: fp16 is not supported .Config(run_with_tunable_op) .RunWithConfig();