diff --git a/cpp/src/gandiva/function_holder_registry.h b/cpp/src/gandiva/function_holder_registry.h index e1c5630e84191..225c73207fcc0 100644 --- a/cpp/src/gandiva/function_holder_registry.h +++ b/cpp/src/gandiva/function_holder_registry.h @@ -62,6 +62,7 @@ class FunctionHolderRegistry { static map_type& makers() { static map_type maker_map = { {"like", LAMBDA_MAKER(LikeHolder)}, + {"ilike", LAMBDA_MAKER(LikeHolder)}, {"to_date", LAMBDA_MAKER(ToDateHolder)}, {"random", LAMBDA_MAKER(RandomGeneratorHolder)}, {"rand", LAMBDA_MAKER(RandomGeneratorHolder)}, diff --git a/cpp/src/gandiva/function_registry_string.cc b/cpp/src/gandiva/function_registry_string.cc index 90e2231677357..7491e4435a9dc 100644 --- a/cpp/src/gandiva/function_registry_string.cc +++ b/cpp/src/gandiva/function_registry_string.cc @@ -131,6 +131,10 @@ std::vector GetStringFunctionRegistry() { kResultNullIfNull, "gdv_fn_like_utf8_utf8_utf8", NativeFunction::kNeedsFunctionHolder), + NativeFunction("ilike", {}, DataTypeVector{utf8(), utf8()}, boolean(), + kResultNullIfNull, "gdv_fn_ilike_utf8_utf8", + NativeFunction::kNeedsFunctionHolder), + NativeFunction("ltrim", {}, DataTypeVector{utf8(), utf8()}, utf8(), kResultNullIfNull, "ltrim_utf8_utf8", NativeFunction::kNeedsContext), diff --git a/cpp/src/gandiva/gdv_function_stubs.cc b/cpp/src/gandiva/gdv_function_stubs.cc index 38c31a8c3f505..3c278049ed6fb 100644 --- a/cpp/src/gandiva/gdv_function_stubs.cc +++ b/cpp/src/gandiva/gdv_function_stubs.cc @@ -52,6 +52,12 @@ bool gdv_fn_like_utf8_utf8_utf8(int64_t ptr, const char* data, int data_len, return (*holder)(std::string(data, data_len)); } +bool gdv_fn_ilike_utf8_utf8(int64_t ptr, const char* data, int data_len, + const char* pattern, int pattern_len) { + gandiva::LikeHolder* holder = reinterpret_cast(ptr); + return (*holder)(std::string(data, data_len)); +} + double gdv_fn_random(int64_t ptr) { gandiva::RandomGeneratorHolder* holder = reinterpret_cast(ptr); @@ -807,6 +813,17 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const { types->i1_type() /*return_type*/, args, reinterpret_cast(gdv_fn_like_utf8_utf8_utf8)); + // gdv_fn_ilike_utf8_utf8 + args = {types->i64_type(), // int64_t ptr + types->i8_ptr_type(), // const char* data + types->i32_type(), // int data_len + types->i8_ptr_type(), // const char* pattern + types->i32_type()}; // int pattern_len + + engine->AddGlobalMappingForFunc("gdv_fn_ilike_utf8_utf8", + types->i1_type() /*return_type*/, args, + reinterpret_cast(gdv_fn_ilike_utf8_utf8)); + // gdv_fn_to_date_utf8_utf8 args = {types->i64_type(), // int64_t execution_context types->i64_type(), // int64_t holder_ptr diff --git a/cpp/src/gandiva/gdv_function_stubs.h b/cpp/src/gandiva/gdv_function_stubs.h index ee22c3f4ece14..043e94034ed7f 100644 --- a/cpp/src/gandiva/gdv_function_stubs.h +++ b/cpp/src/gandiva/gdv_function_stubs.h @@ -50,6 +50,9 @@ bool gdv_fn_like_utf8_utf8_utf8(int64_t ptr, const char* data, int data_len, const char* pattern, int pattern_len, const char* escape_char, int escape_char_len); +bool gdv_fn_ilike_utf8_utf8(int64_t ptr, const char* data, int data_len, + const char* pattern, int pattern_len); + int64_t gdv_fn_to_date_utf8_utf8_int32(int64_t context, int64_t ptr, const char* data, int data_len, bool in1_validity, const char* pattern, int pattern_len, diff --git a/cpp/src/gandiva/like_holder.cc b/cpp/src/gandiva/like_holder.cc index 5a3510e36528b..af9ac67d66ac4 100644 --- a/cpp/src/gandiva/like_holder.cc +++ b/cpp/src/gandiva/like_holder.cc @@ -80,6 +80,13 @@ Status LikeHolder::Make(const FunctionNode& node, std::shared_ptr* h !IsArrowStringLiteral(literal_type), Status::Invalid( "'like' function requires a string literal as the second parameter")); + + RE2::Options regex_op; + if (node.descriptor()->name() == "ilike") { + regex_op.set_case_sensitive(false); // set case-insensitive for ilike function. + + return Make(arrow::util::get(literal->holder()), holder, regex_op); + } if (node.children().size() == 2) { return Make(arrow::util::get(literal->holder()), holder); } else { @@ -132,4 +139,18 @@ Status LikeHolder::Make(const std::string& sql_pattern, const std::string& escap return Status::OK(); } +Status LikeHolder::Make(const std::string& sql_pattern, + std::shared_ptr* holder, RE2::Options regex_op) { + std::string pcre_pattern; + ARROW_RETURN_NOT_OK(RegexUtil::SqlLikePatternToPcre(sql_pattern, pcre_pattern)); + + std::shared_ptr lholder; + lholder = std::shared_ptr(new LikeHolder(pcre_pattern, regex_op)); + + ARROW_RETURN_IF(!lholder->regex_.ok(), + Status::Invalid("Building RE2 pattern '", pcre_pattern, "' failed")); + + *holder = lholder; + return Status::OK(); +} } // namespace gandiva diff --git a/cpp/src/gandiva/like_holder.h b/cpp/src/gandiva/like_holder.h index c7982e9143748..73e58017de19f 100644 --- a/cpp/src/gandiva/like_holder.h +++ b/cpp/src/gandiva/like_holder.h @@ -42,6 +42,9 @@ class GANDIVA_EXPORT LikeHolder : public FunctionHolder { static Status Make(const std::string& sql_pattern, const std::string& escape_char, std::shared_ptr* holder); + static Status Make(const std::string& sql_pattern, std::shared_ptr* holder, + RE2::Options regex_op); + // Try and optimise a function node with a "like" pattern. static const FunctionNode TryOptimize(const FunctionNode& node); @@ -51,6 +54,9 @@ class GANDIVA_EXPORT LikeHolder : public FunctionHolder { private: explicit LikeHolder(const std::string& pattern) : pattern_(pattern), regex_(pattern) {} + LikeHolder(const std::string& pattern, RE2::Options regex_op) + : pattern_(pattern), regex_(pattern, regex_op) {} + std::string pattern_; // posix pattern string, to help debugging RE2 regex_; // compiled regex for the pattern diff --git a/cpp/src/gandiva/like_holder_test.cc b/cpp/src/gandiva/like_holder_test.cc index 18e585fc502d4..a52533a113836 100644 --- a/cpp/src/gandiva/like_holder_test.cc +++ b/cpp/src/gandiva/like_holder_test.cc @@ -27,6 +27,7 @@ namespace gandiva { class TestLikeHolder : public ::testing::Test { public: + RE2::Options regex_op; FunctionNode BuildLike(std::string pattern) { auto field = std::make_shared(arrow::field("in", arrow::utf8())); auto pattern_node = @@ -48,7 +49,7 @@ class TestLikeHolder : public ::testing::Test { TEST_F(TestLikeHolder, TestMatchAny) { std::shared_ptr like_holder; - auto status = LikeHolder::Make("ab%", &like_holder); + auto status = LikeHolder::Make("ab%", &like_holder, regex_op); EXPECT_EQ(status.ok(), true) << status.message(); auto& like = *like_holder; @@ -63,7 +64,7 @@ TEST_F(TestLikeHolder, TestMatchAny) { TEST_F(TestLikeHolder, TestMatchOne) { std::shared_ptr like_holder; - auto status = LikeHolder::Make("ab_", &like_holder); + auto status = LikeHolder::Make("ab_", &like_holder, regex_op); EXPECT_EQ(status.ok(), true) << status.message(); auto& like = *like_holder; @@ -78,7 +79,7 @@ TEST_F(TestLikeHolder, TestMatchOne) { TEST_F(TestLikeHolder, TestPcreSpecial) { std::shared_ptr like_holder; - auto status = LikeHolder::Make(".*ab_", &like_holder); + auto status = LikeHolder::Make(".*ab_", &like_holder, regex_op); EXPECT_EQ(status.ok(), true) << status.message(); auto& like = *like_holder; @@ -97,7 +98,7 @@ TEST_F(TestLikeHolder, TestRegexEscape) { TEST_F(TestLikeHolder, TestDot) { std::shared_ptr like_holder; - auto status = LikeHolder::Make("abc.", &like_holder); + auto status = LikeHolder::Make("abc.", &like_holder, regex_op); EXPECT_EQ(status.ok(), true) << status.message(); auto& like = *like_holder; @@ -211,4 +212,70 @@ TEST_F(TestLikeHolder, TestMultipleEscapeChar) { auto status = LikeHolder::Make("ab\\_", "\\\\", &like_holder); EXPECT_EQ(status.ok(), false) << status.message(); } +class TestILikeHolder : public ::testing::Test { + public: + RE2::Options regex_op; + FunctionNode BuildILike(std::string pattern) { + auto field = std::make_shared(arrow::field("in", arrow::utf8())); + auto pattern_node = + std::make_shared(arrow::utf8(), LiteralHolder(pattern), false); + return FunctionNode("ilike", {field, pattern_node}, arrow::boolean()); + } +}; + +TEST_F(TestILikeHolder, TestMatchAny) { + std::shared_ptr like_holder; + + regex_op.set_case_sensitive(false); + auto status = LikeHolder::Make("ab%", &like_holder, regex_op); + EXPECT_EQ(status.ok(), true) << status.message(); + + auto& like = *like_holder; + EXPECT_TRUE(like("ab")); + EXPECT_TRUE(like("aBc")); + EXPECT_TRUE(like("ABCD")); + + EXPECT_FALSE(like("a")); + EXPECT_FALSE(like("cab")); +} + +TEST_F(TestILikeHolder, TestMatchOne) { + std::shared_ptr like_holder; + + regex_op.set_case_sensitive(false); + auto status = LikeHolder::Make("Ab_", &like_holder, regex_op); + EXPECT_EQ(status.ok(), true) << status.message(); + + auto& like = *like_holder; + EXPECT_TRUE(like("abc")); + EXPECT_TRUE(like("aBd")); + + EXPECT_FALSE(like("A")); + EXPECT_FALSE(like("Abcd")); + EXPECT_FALSE(like("DaBc")); +} + +TEST_F(TestILikeHolder, TestPcreSpecial) { + std::shared_ptr like_holder; + + regex_op.set_case_sensitive(false); + auto status = LikeHolder::Make(".*aB_", &like_holder, regex_op); + EXPECT_EQ(status.ok(), true) << status.message(); + + auto& like = *like_holder; + EXPECT_TRUE(like(".*Abc")); // . and * aren't special in sql regex + EXPECT_FALSE(like("xxAbc")); +} + +TEST_F(TestILikeHolder, TestDot) { + std::shared_ptr like_holder; + + regex_op.set_case_sensitive(false); + auto status = LikeHolder::Make("aBc.", &like_holder, regex_op); + EXPECT_EQ(status.ok(), true) << status.message(); + + auto& like = *like_holder; + EXPECT_FALSE(like("abcd")); +} + } // namespace gandiva