From 6d7682c7f553ed65bde67bba5f69e8c495290e02 Mon Sep 17 00:00:00 2001 From: Andrew Kwangwoong Park Date: Mon, 25 Mar 2024 14:11:56 +0900 Subject: [PATCH] [GPU] Extend pattern for ClampFP16Output (#23592) ### Details: - By PR(https://github.com/openvinotoolkit/openvino/pull/22245), `clamp_fp16_output` opt pass was moved to ngraph - Because nodes such as eltwise(`Add`, `Subtract`, `Multiply`, `Divide`) that were fused into target node `gemm` are not supported in pattern, corresponding pattern was extended for this purpose ### Tickets: - 135060 --- .../transformations/clamp_fp16_output.cpp | 9 +++- .../clamp_fp16_output_test.cpp | 49 ++++++++++++++++++- 2 files changed, 56 insertions(+), 2 deletions(-) diff --git a/src/plugins/intel_gpu/src/plugin/transformations/clamp_fp16_output.cpp b/src/plugins/intel_gpu/src/plugin/transformations/clamp_fp16_output.cpp index 941b5c51ec3a67..575604038c0deb 100644 --- a/src/plugins/intel_gpu/src/plugin/transformations/clamp_fp16_output.cpp +++ b/src/plugins/intel_gpu/src/plugin/transformations/clamp_fp16_output.cpp @@ -10,6 +10,10 @@ #include "openvino/op/matmul.hpp" #include "openvino/op/softmax.hpp" #include "openvino/op/reshape.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/multiply.hpp" +#include "openvino/op/subtract.hpp" +#include "openvino/op/divide.hpp" #include "openvino/pass/pattern/op/pattern.hpp" #include "openvino/pass/pattern/op/wrap_type.hpp" #include "openvino/pass/pattern/op/or.hpp" @@ -28,7 +32,10 @@ ClampFP16Output::ClampFP16Output() { auto in1 = any_input(as_value_predicate(class_other_than())); auto matmul_m = wrap_type({in0, in1}, all_of({type_matches(ov::element::f16), consumers_count(1)})); auto reshape_m = wrap_type({matmul_m, any_input()}, all_of({type_matches(ov::element::f16), consumers_count(1)})); - auto softmax_input_m = std::make_shared(ov::OutputVector{reshape_m, matmul_m}); + auto add_m = wrap_type({matmul_m, any_input()}, all_of({type_matches(ov::element::f16), consumers_count(1)})); + auto eltwise_m = wrap_type({matmul_m, any_input()}, + all_of({type_matches(ov::element::f16), consumers_count(1)})); + auto softmax_input_m = std::make_shared(ov::OutputVector{eltwise_m, reshape_m, matmul_m}); auto softmax_m = wrap_type({softmax_input_m}, type_matches(ov::element::f16)); ov::matcher_pass_callback callback = [=](ov::pass::pattern::Matcher& m) { diff --git a/src/plugins/intel_gpu/tests/unit/transformations/clamp_fp16_output_test.cpp b/src/plugins/intel_gpu/tests/unit/transformations/clamp_fp16_output_test.cpp index 3973b7701108f5..5bdb492ea04e59 100644 --- a/src/plugins/intel_gpu/tests/unit/transformations/clamp_fp16_output_test.cpp +++ b/src/plugins/intel_gpu/tests/unit/transformations/clamp_fp16_output_test.cpp @@ -15,6 +15,8 @@ #include #include "openvino/op/clamp.hpp" #include "openvino/op/reshape.hpp" +#include "openvino/op/add.hpp" +#include "openvino/op/maximum.hpp" #include #include #include @@ -92,7 +94,6 @@ TEST_F(TransformationTestsF, ClampFp16OutputTest3) { comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); } - TEST_F(TransformationTestsF, ClampFp16OutputTest4) { { auto input1 = std::make_shared(ov::element::f16, ov::Shape{ 3, 2, 2 }); @@ -108,3 +109,49 @@ TEST_F(TransformationTestsF, ClampFp16OutputTest4) { } comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); } + +TEST_F(TransformationTestsF, ClampFp16OutputTest5) { + { + auto input1 = std::make_shared(ov::element::f16, ov::Shape{ 3, 2, 2 }); + auto input2 = std::make_shared(ov::element::f16, ov::Shape{ 1, 2, 2 }); + auto matmul = std::make_shared(input1, input2, true, false); + auto data = std::make_shared(ov::element::f16, ov::Shape{ 3, 2, 2 }); + auto add = std::make_shared(matmul, data); + auto softmax = std::make_shared(add, 1); + + model = std::make_shared(ov::NodeVector{ softmax }, ov::ParameterVector{ input1, input2, data }); + manager.register_pass(); + } + { + auto input1 = std::make_shared(ov::element::f16, ov::Shape{ 3, 2, 2 }); + auto input2 = std::make_shared(ov::element::f16, ov::Shape{ 1, 2, 2 }); + auto matmul = std::make_shared(input1, input2, true, false); + auto min = static_cast(std::numeric_limits::lowest()); + auto max = static_cast(std::numeric_limits::max()); + auto clamp = std::make_shared(matmul, min, max); + auto data = std::make_shared(ov::element::f16, ov::Shape{ 3, 2, 2 }); + auto add = std::make_shared(clamp, data); + auto softmax = std::make_shared(add, 1); + + model_ref = std::make_shared(ov::NodeVector{ softmax }, ov::ParameterVector{ input1, input2, data }); + } + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +} + +TEST_F(TransformationTestsF, ClampFp16OutputTest6) { + { + auto input1 = std::make_shared(ov::element::f16, ov::Shape{ 3, 2, 2 }); + auto input2 = std::make_shared(ov::element::f16, ov::Shape{ 1, 2, 2 }); + auto matmul = std::make_shared(input1, input2, true, false); + auto data = std::make_shared(ov::element::f16, ov::Shape{ 3, 2, 2 }); + auto maximum = std::make_shared(matmul, data); + auto softmax = std::make_shared(maximum, 1); + + model = std::make_shared(ov::NodeVector{ softmax }, ov::ParameterVector{ input1, input2, data }); + manager.register_pass(); + } + { + model_ref = model->clone(); // Not changed due to types for eltwise not supporting fusion to gemm + } + comparator.enable(FunctionsComparator::CmpValues::ATTRIBUTES); +}