From 7bef289e28b06d169367b9e2e907b97733f28558 Mon Sep 17 00:00:00 2001 From: bibek <108366729+bghimireamd@users.noreply.github.com> Date: Sun, 27 Oct 2024 01:48:48 -0500 Subject: [PATCH] Merge all bn ck nchw branches (#3332) --- driver/bn_driver.hpp | 5 +- .../miopen/batchnorm/problem_description.hpp | 20 ++- src/ocl/batchnormocl.cpp | 4 +- src/solver/batchnorm/backward_ck.cpp | 49 ++++--- src/solver/batchnorm/forward_inference_ck.cpp | 64 +++++---- src/solver/batchnorm/forward_training_ck.cpp | 47 ++++--- test/bn_3d_spatial_test.cpp | 36 ++--- test/bn_spatial_test.cpp | 29 ++-- test/gtest/bn.hpp | 38 +++++ test/gtest/bn_bwd.cpp | 132 +++++++++--------- test/gtest/bn_fwd_train.cpp | 116 +++++++-------- test/gtest/bn_infer.cpp | 120 ++++++++-------- 12 files changed, 359 insertions(+), 301 deletions(-) diff --git a/driver/bn_driver.hpp b/driver/bn_driver.hpp index 238b4ea1e6..c89c3f166d 100644 --- a/driver/bn_driver.hpp +++ b/driver/bn_driver.hpp @@ -60,7 +60,7 @@ #define ERRTOL_FP32 1e-4 #define ERRTOL_FP16 0.5e-3 #define RMSTOL_FP32 1e-4 -#define RMSTOL_FP16 0.5e-3 +#define RMSTOL_FP16 2e-3 #define MIO_DRIVER_BN_REFERENCE_COMPUTE_3D_AS_2D 1 // Resolves issue #1974 @@ -1298,7 +1298,8 @@ int BatchNormDriver::VerifyForward() out.CopyFromDeviceToHost(GetStream()); - maxval = static_cast(0.0); + maxval = static_cast(0.0); + auto errorOut = miopen::rms_range(out_ref.data, out.GetVector()); if(!std::isfinite(errorOut) || errorOut > maxrms) { diff --git a/src/include/miopen/batchnorm/problem_description.hpp b/src/include/miopen/batchnorm/problem_description.hpp index 8054111128..b0ecb64b7e 100644 --- a/src/include/miopen/batchnorm/problem_description.hpp +++ b/src/include/miopen/batchnorm/problem_description.hpp @@ -179,11 +179,7 @@ struct MIOPEN_INTERNALS_EXPORT ProblemDescription : ProblemDescriptionBase, return scaleDesc; } - const TensorDescriptor& GetScaleBiasDiffDesc() const - { - assert(direction == Direction::Backward); - return scaleDesc; - } + const TensorDescriptor& GetScaleBiasDiffDesc() const { return scaleDesc; } bool GetResultSave() const { @@ -217,6 +213,20 @@ struct MIOPEN_INTERNALS_EXPORT ProblemDescription : ProblemDescriptionBase, : ((in_layout == "NDHWC") && (out_layout == "NDHWC")); } + bool IsLayoutNCHW() const + { + if(direction == Direction::Backward) + { + return xDesc.GetLengths().size() == 4 + ? ((in_layout == "NCHW") && (out_layout == "NCHW") && (din_layout == "NCHW")) + : ((in_layout == "NCDHW") && (out_layout == "NCDHW") && + (din_layout == "NCDHW")); + } + + return xDesc.GetLengths().size() == 4 ? ((in_layout == "NCHW") && (out_layout == "NCHW")) + : ((in_layout == "NCDHW") && (out_layout == "NCDHW")); + } + bool Is2D() const { return xDesc.GetLengths().size() == 4; } bool Is3D() const { return xDesc.GetLengths().size() == 5; } diff --git a/src/ocl/batchnormocl.cpp b/src/ocl/batchnormocl.cpp index dca94078be..f5555047c1 100644 --- a/src/ocl/batchnormocl.cpp +++ b/src/ocl/batchnormocl.cpp @@ -250,8 +250,8 @@ void BatchNormForwardInference(Handle& handle, }(); const auto algo = AlgorithmName{"miopenBatchNormalizationForwardInference"}; - const auto solvers = solver::SolverContainer{}; + const auto solvers = solver::SolverContainer{}; solvers.ExecutePrimitive(handle, problem, algo, invoke_params); } diff --git a/src/solver/batchnorm/backward_ck.cpp b/src/solver/batchnorm/backward_ck.cpp index c99a67250b..198b046eff 100644 --- a/src/solver/batchnorm/backward_ck.cpp +++ b/src/solver/batchnorm/backward_ck.cpp @@ -87,7 +87,21 @@ struct CKArgsBNormBwd // prep for CK std::sort(in_strides.begin(), in_strides.end(), std::greater<>()); - std::rotate(lens.begin() + 1, lens.begin() + 2, lens.end()); + + if(problem.IsLayoutNHWC()) + { + std::rotate(lens.begin() + 1, lens.begin() + 2, lens.end()); + reduceDims = {0, 1, 2}; + } + else if(problem.IsLayoutNCHW()) + { + reduceDims = {0, 2, 3}; + } + else + { + MIOPEN_THROW(miopenStatusInternalError, + "BnCKBwd operation does not support this data layout"); + } } CKArgsBNormBwd(const CKArgsBNormBwd&) = default; @@ -133,7 +147,7 @@ struct CKArgsBNormBwd std::array arrScaleBiasMeanVarStrides; double epsilon = 1e-5; - std::array reduceDims{0, 1, 2}; + std::array reduceDims; }; template ()); - std::rotate(xyLengths.begin() + 1, xyLengths.begin() + 2, xyLengths.end()); - aligned_scaleBiasMeanVarStrides[0] = 0; - aligned_scaleBiasMeanVarStrides[1] = 0; - aligned_scaleBiasMeanVarStrides[2] = 0; - aligned_scaleBiasMeanVarStrides[3] = 1; + if(problem.IsLayoutNHWC()) + { + std::rotate(xyLengths.begin() + 1, xyLengths.begin() + 2, xyLengths.end()); + reduceDims = {0, 1, 2}; + aligned_scaleBiasMeanVarStrides[0] = 0; + aligned_scaleBiasMeanVarStrides[1] = 0; + aligned_scaleBiasMeanVarStrides[2] = 0; + aligned_scaleBiasMeanVarStrides[3] = 1; + } + else if(problem.IsLayoutNCHW()) + { + reduceDims = {0, 2, 3}; + aligned_scaleBiasMeanVarStrides[0] = 0; + aligned_scaleBiasMeanVarStrides[1] = 1; + aligned_scaleBiasMeanVarStrides[2] = 0; + aligned_scaleBiasMeanVarStrides[3] = 0; + } + else + { + MIOPEN_THROW(miopenStatusInternalError, + "BnCKFwdInference operation does not support this data layout"); + } } std::array xyLengths; @@ -91,8 +109,9 @@ struct CKArgsBNormFwd std::vector invariantDims; std::array aligned_scaleBiasMeanVarStrides{3}; + std::array arrScaleBiasMeanVarStrides; - std::array reduceDims{0, 1, 2}; + std::array reduceDims; template auto MakeArgPtr(const InvokerPtr& invoker_ptr, const InvokerParams& data_ctx) const @@ -305,7 +324,7 @@ bool BnCKFwdInference::IsApplicable( #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL if(env::disabled(MIOPEN_DEBUG_CK_BN_INFER)) return false; - if(!bn_problem.IsLayoutNHWC()) + if(!bn_problem.IsLayoutNHWC() && !bn_problem.IsLayoutNCHW()) return false; if(!ck_utility::is_ck_supported_hardware(context.GetStream())) return false; @@ -313,6 +332,10 @@ bool BnCKFwdInference::IsApplicable( return false; if(bn_problem.GetDirection() != miopen::batchnorm::Direction::ForwardInference) return false; + if(bn_problem.GetMode() != miopenBNSpatial) + return false; + if(bn_problem.GetXDesc().GetType() != bn_problem.GetScaleBiasDiffDesc().GetType()) + return false; switch(bn_problem.GetXDesc().GetType()) { @@ -330,29 +353,20 @@ bool BnCKFwdInference::IsApplicable( return false; } -template +template ConvSolution MakeAnyInvokerFactory(const miopen::batchnorm::ProblemDescription& problem, - InvokerFactoryMakerNHWC&& invoker_factory_maker_nhwc) + InvokerFactoryMaker&& invoker_factory_maker) { #if MIOPEN_BACKEND_HIP && MIOPEN_USE_COMPOSABLEKERNEL - if(problem.IsLayoutNHWC()) - { - switch(problem.GetXDesc().GetType()) - { - case miopenFloat: return invoker_factory_maker_nhwc(F32{}); - case miopenDouble: return invoker_factory_maker_nhwc(F64{}); - case miopenHalf: return invoker_factory_maker_nhwc(F16{}); - case miopenBFloat16: return invoker_factory_maker_nhwc(BF16{}); - default: - MIOPEN_THROW(miopenStatusInternalError, - "BnCKFwdInference operation does not support this data type"); - } - } - // Todo: problem.IsLayoutDefault() - else + switch(problem.GetXDesc().GetType()) { + case miopenFloat: return invoker_factory_maker(F32{}); + case miopenDouble: return invoker_factory_maker(F64{}); + case miopenHalf: return invoker_factory_maker(F16{}); + case miopenBFloat16: return invoker_factory_maker(BF16{}); + default: MIOPEN_THROW(miopenStatusInternalError, - "BnCKFwdInference operation does not support this data layout"); + "BnCKFwdInference operation does not support this data type"); } #else return {}; diff --git a/src/solver/batchnorm/forward_training_ck.cpp b/src/solver/batchnorm/forward_training_ck.cpp index cdaafe7b58..fec919cf79 100644 --- a/src/solver/batchnorm/forward_training_ck.cpp +++ b/src/solver/batchnorm/forward_training_ck.cpp @@ -86,7 +86,21 @@ struct CKArgsBNormFwdTraining // prep for CK std::sort(xyStrides.begin(), xyStrides.end(), std::greater<>()); - std::rotate(xyLengths.begin() + 1, xyLengths.begin() + 2, xyLengths.end()); + + if(problem.IsLayoutNHWC()) + { + std::rotate(xyLengths.begin() + 1, xyLengths.begin() + 2, xyLengths.end()); + reduceDims = {0, 1, 2}; + } + else if(problem.IsLayoutNCHW()) + { + reduceDims = {0, 2, 3}; + } + else + { + MIOPEN_THROW(miopenStatusInternalError, + "BnCKFwdTraining operation does not support this data layout"); + } } CKArgsBNormFwdTraining(const CKArgsBNormFwdTraining&) = default; @@ -131,7 +145,7 @@ struct CKArgsBNormFwdTraining std::array arrScaleBiasMeanVarLengths; std::array arrScaleBiasMeanVarStrides; - std::array reduceDims{0, 1, 2}; + std::array reduceDims; }; template batch_factor = 4; + this->tolerance = + 4e-3 / std::numeric_limits::epsilon(); // ck solver has tolerance of 4e-3 add(input, "input", get_3d_bn_spatial_input_tensor( @@ -1233,34 +1235,18 @@ struct batch_norm_3d_spatial_driver : test_driver miopen::DeriveBNTensorDescriptor(derivedBnDesc, input.desc, miopenBNSpatial); std::tie(ssn, ssc, ssd, ssh, ssw) = miopen::tien<5>(derivedBnDesc.GetLengths()); - if(input.desc.GetType() == miopenFloat) - { - scale = - tensor{ssn, ssc, ssd, ssh, ssw}.generate(tensor_elem_gen_integer{17}); - shift = - tensor{ssn, ssc, ssd, ssh, ssw}.generate(tensor_elem_gen_integer{17}); + scale = tensor{ssn, ssc, ssd, ssh, ssw}; + shift = tensor{ssn, ssc, ssd, ssh, ssw}; + const double Data_scale = 1e-4; - if(d * h * w < 3072) - { - std::cout << "Choosing smaller input values for low dims" << std::endl; - input = tensor{n, c, d, h, w}.generate(tensor_elem_gen_integer{7}); - } + for(std::size_t i = 0; i < scale.desc.GetElementSize(); i++) + { + scale[i] = prng::gen_descreet_uniform_sign(Data_scale, 100); + shift[i] = prng::gen_descreet_uniform_sign(Data_scale, 100); } - else + for(std::size_t i = 0; i < input.desc.GetElementSize(); i++) { - scale = tensor{ssn, ssc, ssd, ssh, ssw}; - shift = tensor{ssn, ssc, ssd, ssh, ssw}; - - const double Data_scale = 1e-4; - for(std::size_t i = 0; i < scale.desc.GetElementSize(); i++) - { - scale[i] = prng::gen_descreet_uniform_sign(Data_scale, 100); - shift[i] = prng::gen_descreet_uniform_sign(Data_scale, 100); - } - for(std::size_t i = 0; i < input.desc.GetElementSize(); i++) - { - input[i] = prng::gen_descreet_uniform_sign(1e-5, 100); - } + input[i] = prng::gen_descreet_uniform_sign(1e-5, 100); } // train diff --git a/test/bn_spatial_test.cpp b/test/bn_spatial_test.cpp index 95a8ee099a..3db862c45b 100644 --- a/test/bn_spatial_test.cpp +++ b/test/bn_spatial_test.cpp @@ -1130,6 +1130,8 @@ struct batch_norm_spatial_driver : test_driver batch_norm_spatial_driver() { this->batch_factor = 4; + this->tolerance = + 4e-3 / std::numeric_limits::epsilon(); // ck solver has tolerance of 4e-3 add(input, "input", get_bn_spatial_input_tensor( @@ -1155,26 +1157,18 @@ struct batch_norm_spatial_driver : test_driver miopen::DeriveBNTensorDescriptor(derivedBnDesc, input.desc, miopenBNSpatial); std::tie(ssn, ssc, ssh, ssw) = miopen::tien<4>(derivedBnDesc.GetLengths()); - if(input.desc.GetType() == miopenFloat) + scale = tensor{ssn, ssc, ssh, ssw}; + shift = tensor{ssn, ssc, ssh, ssw}; + const double Data_scale = 1e-2; + + for(std::size_t i = 0; i < scale.desc.GetElementSize(); i++) { - scale = tensor{ssn, ssc, ssh, ssw}.generate(tensor_elem_gen_integer{17}); - shift = tensor{ssn, ssc, ssh, ssw}.generate(tensor_elem_gen_integer{17}); + scale[i] = prng::gen_descreet_uniform_sign(Data_scale, 100); + shift[i] = prng::gen_descreet_uniform_sign(Data_scale, 100); } - else + for(std::size_t i = 0; i < input.desc.GetElementSize(); i++) { - scale = tensor{ssn, ssc, ssh, ssw}; - shift = tensor{ssn, ssc, ssh, ssw}; - - const double Data_scale = 1e-4; - for(std::size_t i = 0; i < scale.desc.GetElementSize(); i++) - { - scale[i] = prng::gen_descreet_uniform_sign(Data_scale, 100); - shift[i] = prng::gen_descreet_uniform_sign(Data_scale, 100); - } - for(std::size_t i = 0; i < input.desc.GetElementSize(); i++) - { - input[i] = prng::gen_descreet_uniform_sign(1e-5, 100); - } + input[i] = prng::gen_descreet_uniform_sign(Data_scale, 100); } // train @@ -1193,6 +1187,7 @@ struct batch_norm_spatial_driver : test_driver // std::fill(input.begin(), input.end(), 1); // std::fill(scale.begin(), scale.end(), 1); // std::fill(shift.begin(), shift.end(), 1); + this->tolerance = 80 * input.desc.GetElementSize(); verify(verify_forward_infer_bn_spatial_recalc{input, scale, shift}); // inference use estimated running values diff --git a/test/gtest/bn.hpp b/test/gtest/bn.hpp index e1f192c37d..a195b6b81e 100644 --- a/test/gtest/bn.hpp +++ b/test/gtest/bn.hpp @@ -39,6 +39,44 @@ enum BNApiType testBNAPIV2, }; +// Assuming miopenTensorLayout_t and testAPI_t are the types of your enums +static std::string LayoutToString(int tensor_format) +{ + switch(tensor_format) + { + case miopenTensorNCHW: return "NCHW"; + case miopenTensorNHWC: return "NHWC"; + default: return "UnknownTensorFormat"; + } +} + +static std::string ApiVerisonToString(int api_version) +{ + switch(api_version) + { + case testBNAPIV1: return "testBNAPIV1"; + case testBNAPIV2: return "testBNAPIV2"; + default: return "UnknownAPIVersion"; + } +} + +// Custom test name generator to handle enums +struct TestNameGenerator +{ + std::string operator()( + const testing::TestParamInfo>& info) + const + { + const auto& layout_type = std::get<1>(info.param); + const auto& api_type = std::get<2>(info.param); + + std::string tensor_name = LayoutToString(layout_type); + std::string api_name = ApiVerisonToString(api_type); + + return tensor_name + "_" + api_name + "_" + std::to_string(info.index); + } +}; + template -// { -// }; - -// NHWC solver accepts -// XDataType : half_float::half -// YDataYype : half_float::half -// ScaleDataType : half_float::half -// BiasDataType : half_float::half -// MeanVarDataType : float -struct GPU_BN_V2_BwdNHWC_FP16 +struct GPU_BN_BWD_Small_FP16 + : BNBwdTest +{ +}; + +struct GPU_BN_BWD_Large_FP16 : BNBwdTest { }; @@ -55,94 +42,101 @@ struct GPU_BN_V2_BwdNHWC_FP16 // ScaleDataType : bfloat16 // BiasDataType : bfloat16 // MeanVarDataType : float -struct GPU_BN_V1_BwdNHWC_BFP16 : BNBwdTest +struct GPU_BN_BWD_Small_BFP16 : BNBwdTest { }; -struct GPU_BN_V2_BwdNHWC_BFP16 : BNBwdTest +struct GPU_BN_BWD_Large_BFP16 : BNBwdTest { }; -struct GPU_BN_V1_Bwd_FP32 : BNBwdTest +struct GPU_BN_BWD_Small_FP32 : BNBwdTest { }; -struct GPU_BN_V2_Bwd_FP32 : BNBwdTest +struct GPU_BN_BWD_Large_FP32 : BNBwdTest { }; -struct GPU_BN_V1_BwdNHWC_FP64 : BNBwdTest +struct GPU_BN_BWD_Small_FP64 : BNBwdTest { }; -struct GPU_BN_V2_BwdNHWC_FP64 : BNBwdTest +struct GPU_BN_BWD_Large_FP64 : BNBwdTest { }; // fp16 -// TEST_P(GPU_BN_V1_BwdNCHW_FP16, BnV1BwdHalf) {} -TEST_P(GPU_BN_V2_BwdNHWC_FP16, BnV2BwdCKHalf) {} +TEST_P(GPU_BN_BWD_Small_FP16, BnV1SmallBWDCKfp16) {} +TEST_P(GPU_BN_BWD_Large_FP16, BnV2LargeBWDCKfp16) {} -// float -TEST_P(GPU_BN_V1_Bwd_FP32, BnV1BwdFloat) {} -TEST_P(GPU_BN_V2_Bwd_FP32, BnV2BwdFloat) {} +// bfp16 +TEST_P(GPU_BN_BWD_Small_BFP16, BnV1SmallBWDCKbfp16) {} +TEST_P(GPU_BN_BWD_Large_BFP16, BnV2LargeBWDCKbfp16) {} -// bfp16 is only on CK solver -TEST_P(GPU_BN_V1_BwdNHWC_BFP16, BnV1BwdCKBfloat) {} -TEST_P(GPU_BN_V2_BwdNHWC_BFP16, BnV2BwdCKBfloat) {} +// fp32 (float) +TEST_P(GPU_BN_BWD_Small_FP32, BnV1SmallBWDCKfp32) {} +TEST_P(GPU_BN_BWD_Large_FP32, BnV2LargeBWDCKfp32) {} -// double is only on CK solver -TEST_P(GPU_BN_V1_BwdNHWC_FP64, BnV1BwdCKDouble) {} -TEST_P(GPU_BN_V2_BwdNHWC_FP64, BnV2BwdCKDouble) {} +// fp64 +TEST_P(GPU_BN_BWD_Small_FP64, BnV1SmallBWDCKfp64) {} +TEST_P(GPU_BN_BWD_Large_FP64, BnV2LargeBWDCKfp64) {} -// // fp16 -// INSTANTIATE_TEST_SUITE_P(Smoke, -// GPU_BN_V1_BwdNCHW_FP16, -// testing::Combine(testing::ValuesIn(NetworkSmall()), -// testing::Values(miopenTensorNCHW), -// testing::ValuesIn({testBNAPIV1}))); +// fp16 +INSTANTIATE_TEST_SUITE_P(Smoke, + GPU_BN_BWD_Small_FP16, + testing::Combine(testing::ValuesIn(NetworkSmall()), + testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::ValuesIn({testBNAPIV1})), + TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_V2_BwdNHWC_FP16, + GPU_BN_BWD_Large_FP16, testing::Combine(testing::ValuesIn(NetworkLarge()), - testing::Values(miopenTensorNHWC), - testing::ValuesIn({testBNAPIV2}))); + testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::ValuesIn({testBNAPIV2})), + TestNameGenerator()); -// fp32 +// bfp16 INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_V1_Bwd_FP32, + GPU_BN_BWD_Small_BFP16, testing::Combine(testing::ValuesIn(NetworkSmall()), - testing::Values(miopenTensorNCHW), - testing::ValuesIn({testBNAPIV1}))); + testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::ValuesIn({testBNAPIV1})), + TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_V2_Bwd_FP32, + GPU_BN_BWD_Large_BFP16, testing::Combine(testing::ValuesIn(NetworkLarge()), - testing::Values(miopenTensorNHWC), - testing::ValuesIn({testBNAPIV2}))); + testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::ValuesIn({testBNAPIV2})), + TestNameGenerator()); -// bfp16 is only on CK solver +// fp32 INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_V1_BwdNHWC_BFP16, - testing::Combine(testing::ValuesIn(NetworkSmall()), - testing::Values(miopenTensorNHWC), - testing::ValuesIn({testBNAPIV1}))); + GPU_BN_BWD_Small_FP32, + testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::ValuesIn({testBNAPIV2})), + TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_V2_BwdNHWC_BFP16, + GPU_BN_BWD_Large_FP32, testing::Combine(testing::ValuesIn(NetworkLarge()), - testing::Values(miopenTensorNHWC), - testing::ValuesIn({testBNAPIV2}))); - -// fp64 is only on CK solver + testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::ValuesIn({testBNAPIV2})), + TestNameGenerator()); +// fp64 INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_V1_BwdNHWC_FP64, - testing::Combine(testing::ValuesIn(NetworkSmall()), - testing::Values(miopenTensorNHWC), - testing::ValuesIn({testBNAPIV1}))); + GPU_BN_BWD_Small_FP64, + testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::ValuesIn({testBNAPIV2})), + TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_V2_BwdNHWC_FP64, + GPU_BN_BWD_Large_FP64, testing::Combine(testing::ValuesIn(NetworkLarge()), - testing::Values(miopenTensorNHWC), - testing::ValuesIn({testBNAPIV2}))); + testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::ValuesIn({testBNAPIV2})), + TestNameGenerator()); diff --git a/test/gtest/bn_fwd_train.cpp b/test/gtest/bn_fwd_train.cpp index ed25631175..8cc60e2d55 100644 --- a/test/gtest/bn_fwd_train.cpp +++ b/test/gtest/bn_fwd_train.cpp @@ -26,119 +26,121 @@ #include "bn.hpp" -// ** OCL kernel for fwd training are failing gtest ** -// ** Hence, this gtest only tests CK solvers ** - -// NHWC solver accepts -// XDataType : half_float::half -// YDataYype : half_float::half -// ScaleDataType : half_float::half -// BiasDataType : half_float::half +// XDataType : half +// YDataYype : half +// ScaleDataType : half +// BiasDataType : half // MeanVarDataType : float -struct GPU_BN_V1_FwdTrainNHWC_FP16 +struct GPU_BN_FWD_Train_Small_FP16 : BNFwdTrainTest { }; -struct GPU_BN_V2_FwdTrainNHWC_FP16 +struct GPU_BN_FWD_Train_Large_FP16 : BNFwdTrainTest { }; -// bf16 NHWC solver accepts is only on CK solver // XDataType : bfloat16 // YDataYype : bfloat16 // ScaleDataType : bfloat16 // BiasDataType : bfloat16 // MeanVarDataType : float -struct GPU_BN_V1_FwdTrainNHWC_BFP16 : BNFwdTrainTest +struct GPU_BN_FWD_Train_Small_BFP16 : BNFwdTrainTest { }; -struct GPU_BN_V2_FwdTrainNHWC_BFP16 : BNFwdTrainTest +struct GPU_BN_FWD_Train_Large_BFP16 : BNFwdTrainTest { }; -struct GPU_BN_V1_FwdTrainNHWC_FP32 : BNFwdTrainTest +struct GPU_BN_FWD_Train_Small_FP32 : BNFwdTrainTest { }; -struct GPU_BN_V2_FwdTrainNHWC_FP32 : BNFwdTrainTest +struct GPU_BN_FWD_Train_Large_FP32 : BNFwdTrainTest { }; -struct GPU_BN_V1_FwdTrainNHWC_FP64 : BNFwdTrainTest +struct GPU_BN_FWD_Train_Small_FP64 : BNFwdTrainTest { }; -struct GPU_BN_V2_FwdTrainNHWC_FP64 : BNFwdTrainTest +struct GPU_BN_FWD_Train_Large_FP64 : BNFwdTrainTest { }; // fp16 -TEST_P(GPU_BN_V1_FwdTrainNHWC_FP16, BnV1FwdTrainHalf) {} -TEST_P(GPU_BN_V2_FwdTrainNHWC_FP16, BnV2FwdTrainCKHalf) {} - -// float -TEST_P(GPU_BN_V1_FwdTrainNHWC_FP32, BnV1FwdTrainFloat) {} -TEST_P(GPU_BN_V2_FwdTrainNHWC_FP32, BnV2FwdTrainFloat) {} +TEST_P(GPU_BN_FWD_Train_Small_FP16, BnV1SmallFWD_TrainCKfp16) {} +TEST_P(GPU_BN_FWD_Train_Large_FP16, BnV2LargeFWD_TrainCKfp16) {} // bfp16 -TEST_P(GPU_BN_V1_FwdTrainNHWC_BFP16, BnV1FwdTrainCKBfloat) {} -TEST_P(GPU_BN_V2_FwdTrainNHWC_BFP16, BnV2FwdTrainCKBfloat) {} +TEST_P(GPU_BN_FWD_Train_Small_BFP16, BnV1SmallFWD_TrainCKbfp16) {} +TEST_P(GPU_BN_FWD_Train_Large_BFP16, BnV2LargeFWD_TrainCKbfp16) {} + +// fp32 (float) +TEST_P(GPU_BN_FWD_Train_Small_FP32, BnV1SmallFWD_TrainCKfp32) {} +TEST_P(GPU_BN_FWD_Train_Large_FP32, BnV2LargeFWD_TrainCKfp32) {} -// double -TEST_P(GPU_BN_V1_FwdTrainNHWC_FP64, BnV1FwdTrainCKDouble) {} -TEST_P(GPU_BN_V2_FwdTrainNHWC_FP64, BnV2FwdTrainCKDouble) {} +// fp64 +TEST_P(GPU_BN_FWD_Train_Small_FP64, BnV1SmallFWD_TrainCKfp64) {} +TEST_P(GPU_BN_FWD_Train_Large_FP64, BnV2LargeFWD_TrainCKfp64) {} // fp16 INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_V1_FwdTrainNHWC_FP16, + GPU_BN_FWD_Train_Small_FP16, testing::Combine(testing::ValuesIn(NetworkSmall()), - testing::Values(miopenTensorNHWC), - testing::ValuesIn({testBNAPIV1}))); + testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::ValuesIn({testBNAPIV1})), + TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_V2_FwdTrainNHWC_FP16, + GPU_BN_FWD_Train_Large_FP16, testing::Combine(testing::ValuesIn(NetworkLarge()), - testing::Values(miopenTensorNHWC), - testing::ValuesIn({testBNAPIV2}))); + testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::ValuesIn({testBNAPIV2})), + TestNameGenerator()); -// fp32 +// bfp16 INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_V1_FwdTrainNHWC_FP32, + GPU_BN_FWD_Train_Small_BFP16, testing::Combine(testing::ValuesIn(NetworkSmall()), - testing::Values(miopenTensorNHWC), - testing::ValuesIn({testBNAPIV1}))); + testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::ValuesIn({testBNAPIV1})), + TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_V2_FwdTrainNHWC_FP32, + GPU_BN_FWD_Train_Large_BFP16, testing::Combine(testing::ValuesIn(NetworkLarge()), - testing::Values(miopenTensorNHWC), - testing::ValuesIn({testBNAPIV2}))); + testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::ValuesIn({testBNAPIV2})), + TestNameGenerator()); -// bfp16 +// fp32 INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_V1_FwdTrainNHWC_BFP16, - testing::Combine(testing::ValuesIn(NetworkSmall()), - testing::Values(miopenTensorNHWC), - testing::ValuesIn({testBNAPIV1}))); + GPU_BN_FWD_Train_Small_FP32, + testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::ValuesIn({testBNAPIV2})), + TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_V2_FwdTrainNHWC_BFP16, + GPU_BN_FWD_Train_Large_FP32, testing::Combine(testing::ValuesIn(NetworkLarge()), - testing::Values(miopenTensorNHWC), - testing::ValuesIn({testBNAPIV2}))); - + testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::ValuesIn({testBNAPIV2})), + TestNameGenerator()); // fp64 INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_V1_FwdTrainNHWC_FP64, - testing::Combine(testing::ValuesIn(NetworkSmall()), - testing::Values(miopenTensorNHWC), - testing::ValuesIn({testBNAPIV1}))); + GPU_BN_FWD_Train_Small_FP64, + testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::ValuesIn({testBNAPIV2})), + TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_V2_FwdTrainNHWC_FP64, + GPU_BN_FWD_Train_Large_FP64, testing::Combine(testing::ValuesIn(NetworkLarge()), - testing::Values(miopenTensorNHWC), - testing::ValuesIn({testBNAPIV2}))); + testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::ValuesIn({testBNAPIV2})), + TestNameGenerator()); diff --git a/test/gtest/bn_infer.cpp b/test/gtest/bn_infer.cpp index aea15c097e..4f32a2b3bd 100644 --- a/test/gtest/bn_infer.cpp +++ b/test/gtest/bn_infer.cpp @@ -26,27 +26,18 @@ #include "bn.hpp" -// NCHW solver accepts +// NHWC solver accepts // XDataType : half_float::half // YDataYype : half_float::half // ScaleDataType : half_float::half // BiasDataType : half_float::half -// MeanVarDataType : half_float::half -struct GPU_BN_V1_InferNCHW_FP16 : BNInferTest +// MeanVarDataType : float +struct GPU_BN_Infer_Small_FP16 + : BNInferTest { }; -// NHWC solver accepts -// XDataType : half_float::half -// YDataYype : half_float::half -// ScaleDataType : half_float::half -// BiasDataType : half_float::half -// MeanVarDataType : float -struct GPU_BN_V2_InferNHWC_FP16 +struct GPU_BN_Infer_Large_FP16 : BNInferTest { }; @@ -57,94 +48,101 @@ struct GPU_BN_V2_InferNHWC_FP16 // ScaleDataType : bfloat16 // BiasDataType : bfloat16 // MeanVarDataType : float -struct GPU_BN_V1_InferNHWC_BFP16 : BNInferTest +struct GPU_BN_Infer_Small_BFP16 : BNInferTest { }; -struct GPU_BN_V2_InferNHWC_BFP16 : BNInferTest +struct GPU_BN_Infer_Large_BFP16 : BNInferTest { }; -struct GPU_BN_V1_Infer_FP32 : BNInferTest +struct GPU_BN_Infer_Small_FP32 : BNInferTest { }; -struct GPU_BN_V2_Infer_FP32 : BNInferTest +struct GPU_BN_Infer_Large_FP32 : BNInferTest { }; -struct GPU_BN_V1_InferNHWC_FP64 : BNInferTest +struct GPU_BN_Infer_Small_FP64 : BNInferTest { }; -struct GPU_BN_V2_InferNHWC_FP64 : BNInferTest +struct GPU_BN_Infer_Large_FP64 : BNInferTest { }; // fp16 -TEST_P(GPU_BN_V1_InferNCHW_FP16, BnV1InferHalf) {} -TEST_P(GPU_BN_V2_InferNHWC_FP16, BnV2InferCKHalf) {} +TEST_P(GPU_BN_Infer_Small_FP16, BnV1SmallInferCKfp16) {} +TEST_P(GPU_BN_Infer_Large_FP16, BnV2LargeInferCKfp16) {} -// float -TEST_P(GPU_BN_V1_Infer_FP32, BnV1InferFloat) {} -TEST_P(GPU_BN_V2_Infer_FP32, BnV2InferFloat) {} +// bfp16 +TEST_P(GPU_BN_Infer_Small_BFP16, BnV1SmallInferCKbfp16) {} +TEST_P(GPU_BN_Infer_Large_BFP16, BnV2LargeInferCKbfp16) {} -// bfp16 is only on CK solver -TEST_P(GPU_BN_V1_InferNHWC_BFP16, BnV1InferCKBfloat) {} -TEST_P(GPU_BN_V2_InferNHWC_BFP16, BnV2InferCKBfloat) {} +// fp32 (float) +TEST_P(GPU_BN_Infer_Small_FP32, BnV1SmallInferCKfp32) {} +TEST_P(GPU_BN_Infer_Large_FP32, BnV2LargeInferCKfp32) {} -// double is only on CK solver -TEST_P(GPU_BN_V1_InferNHWC_FP64, BnV1InferCKDouble) {} -TEST_P(GPU_BN_V2_InferNHWC_FP64, BnV2InferCKDouble) {} +// fp64 +TEST_P(GPU_BN_Infer_Small_FP64, BnV1SmallInferCKfp64) {} +TEST_P(GPU_BN_Infer_Large_FP64, BnV2LargeInferCKfp64) {} // fp16 INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_V1_InferNCHW_FP16, + GPU_BN_Infer_Small_FP16, testing::Combine(testing::ValuesIn(NetworkSmall()), - testing::Values(miopenTensorNCHW), - testing::ValuesIn({testBNAPIV1}))); + testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::ValuesIn({testBNAPIV1})), + TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_V2_InferNHWC_FP16, + GPU_BN_Infer_Large_FP16, testing::Combine(testing::ValuesIn(NetworkLarge()), - testing::Values(miopenTensorNHWC), - testing::ValuesIn({testBNAPIV2}))); + testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::ValuesIn({testBNAPIV2})), + TestNameGenerator()); -// fp32 +// bfp16 INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_V1_Infer_FP32, + GPU_BN_Infer_Small_BFP16, testing::Combine(testing::ValuesIn(NetworkSmall()), - testing::Values(miopenTensorNCHW), - testing::ValuesIn({testBNAPIV1}))); + testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::ValuesIn({testBNAPIV1})), + TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_V2_Infer_FP32, + GPU_BN_Infer_Large_BFP16, testing::Combine(testing::ValuesIn(NetworkLarge()), - testing::Values(miopenTensorNHWC), - testing::ValuesIn({testBNAPIV2}))); + testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::ValuesIn({testBNAPIV2})), + TestNameGenerator()); -// bfp16 is only on CK solver +// fp32 INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_V1_InferNHWC_BFP16, - testing::Combine(testing::ValuesIn(NetworkSmall()), - testing::Values(miopenTensorNHWC), - testing::ValuesIn({testBNAPIV1}))); + GPU_BN_Infer_Small_FP32, + testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::ValuesIn({testBNAPIV2})), + TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_V2_InferNHWC_BFP16, + GPU_BN_Infer_Large_FP32, testing::Combine(testing::ValuesIn(NetworkLarge()), - testing::Values(miopenTensorNHWC), - testing::ValuesIn({testBNAPIV2}))); - -// fp64 is only on CK solver + testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::ValuesIn({testBNAPIV2})), + TestNameGenerator()); +// fp64 INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_V1_InferNHWC_FP64, - testing::Combine(testing::ValuesIn(NetworkSmall()), - testing::Values(miopenTensorNHWC), - testing::ValuesIn({testBNAPIV1}))); + GPU_BN_Infer_Small_FP64, + testing::Combine(testing::ValuesIn(NetworkLarge()), + testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::ValuesIn({testBNAPIV2})), + TestNameGenerator()); INSTANTIATE_TEST_SUITE_P(Smoke, - GPU_BN_V2_InferNHWC_FP64, + GPU_BN_Infer_Large_FP64, testing::Combine(testing::ValuesIn(NetworkLarge()), - testing::Values(miopenTensorNHWC), - testing::ValuesIn({testBNAPIV2}))); + testing::ValuesIn({miopenTensorNCHW, miopenTensorNHWC}), + testing::ValuesIn({testBNAPIV2})), + TestNameGenerator());