Skip to content

Commit

Permalink
ARROW-12567: [C++][Gandiva] Implement ILIKE SQL function
Browse files Browse the repository at this point in the history
Closes apache#10179 from jvictorhuguenin/feature/implement-sql-ilike and squashes the following commits:

f160880 <frank400> Optimize holder constructor call
97e6e2d <frank400> Remove unnecessary Make method
c2363b1 <frank400> Disable TryOptimize for ilike
a484149 <frank400> Fix checkstyle on cmake file
c6a8372 <frank400> Delete unnecessary holder
4be6cc6 <frank400> Fix redefined function
b78085a <frank400> Fix miss include
2efd43e <frank400> Implement ilike function

Authored-by: frank400 <j.victorhuguenin2018@gmail.com>
Signed-off-by: Praveen <praveen@dremio.com>
(cherry picked from commit 0072c67)
  • Loading branch information
jvictorhuguenin committed Sep 16, 2021
1 parent 121cf9d commit e30f6ca
Show file tree
Hide file tree
Showing 7 changed files with 123 additions and 4 deletions.
1 change: 1 addition & 0 deletions cpp/src/gandiva/function_holder_registry.h
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ class FunctionHolderRegistry {
static map_type& makers() {
static map_type maker_map = {
{"like", LAMBDA_MAKER(LikeHolder)},
{"ilike", LAMBDA_MAKER(LikeHolder)},
{"to_date", LAMBDA_MAKER(ToDateHolder)},
{"random", LAMBDA_MAKER(RandomGeneratorHolder)},
{"rand", LAMBDA_MAKER(RandomGeneratorHolder)},
Expand Down
4 changes: 4 additions & 0 deletions cpp/src/gandiva/function_registry_string.cc
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,10 @@ std::vector<NativeFunction> GetStringFunctionRegistry() {
kResultNullIfNull, "gdv_fn_like_utf8_utf8_utf8",
NativeFunction::kNeedsFunctionHolder),

NativeFunction("ilike", {}, DataTypeVector{utf8(), utf8()}, boolean(),
kResultNullIfNull, "gdv_fn_ilike_utf8_utf8",
NativeFunction::kNeedsFunctionHolder),

NativeFunction("ltrim", {}, DataTypeVector{utf8(), utf8()}, utf8(),
kResultNullIfNull, "ltrim_utf8_utf8", NativeFunction::kNeedsContext),

Expand Down
17 changes: 17 additions & 0 deletions cpp/src/gandiva/gdv_function_stubs.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,12 @@ bool gdv_fn_like_utf8_utf8_utf8(int64_t ptr, const char* data, int data_len,
return (*holder)(std::string(data, data_len));
}

bool gdv_fn_ilike_utf8_utf8(int64_t ptr, const char* data, int data_len,
const char* pattern, int pattern_len) {
gandiva::LikeHolder* holder = reinterpret_cast<gandiva::LikeHolder*>(ptr);
return (*holder)(std::string(data, data_len));
}

double gdv_fn_random(int64_t ptr) {
gandiva::RandomGeneratorHolder* holder =
reinterpret_cast<gandiva::RandomGeneratorHolder*>(ptr);
Expand Down Expand Up @@ -807,6 +813,17 @@ void ExportedStubFunctions::AddMappings(Engine* engine) const {
types->i1_type() /*return_type*/, args,
reinterpret_cast<void*>(gdv_fn_like_utf8_utf8_utf8));

// gdv_fn_ilike_utf8_utf8
args = {types->i64_type(), // int64_t ptr
types->i8_ptr_type(), // const char* data
types->i32_type(), // int data_len
types->i8_ptr_type(), // const char* pattern
types->i32_type()}; // int pattern_len

engine->AddGlobalMappingForFunc("gdv_fn_ilike_utf8_utf8",
types->i1_type() /*return_type*/, args,
reinterpret_cast<void*>(gdv_fn_ilike_utf8_utf8));

// gdv_fn_to_date_utf8_utf8
args = {types->i64_type(), // int64_t execution_context
types->i64_type(), // int64_t holder_ptr
Expand Down
3 changes: 3 additions & 0 deletions cpp/src/gandiva/gdv_function_stubs.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ bool gdv_fn_like_utf8_utf8_utf8(int64_t ptr, const char* data, int data_len,
const char* pattern, int pattern_len,
const char* escape_char, int escape_char_len);

bool gdv_fn_ilike_utf8_utf8(int64_t ptr, const char* data, int data_len,
const char* pattern, int pattern_len);

int64_t gdv_fn_to_date_utf8_utf8_int32(int64_t context, int64_t ptr, const char* data,
int data_len, bool in1_validity,
const char* pattern, int pattern_len,
Expand Down
21 changes: 21 additions & 0 deletions cpp/src/gandiva/like_holder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,13 @@ Status LikeHolder::Make(const FunctionNode& node, std::shared_ptr<LikeHolder>* h
!IsArrowStringLiteral(literal_type),
Status::Invalid(
"'like' function requires a string literal as the second parameter"));

RE2::Options regex_op;
if (node.descriptor()->name() == "ilike") {
regex_op.set_case_sensitive(false); // set case-insensitive for ilike function.

return Make(arrow::util::get<std::string>(literal->holder()), holder, regex_op);
}
if (node.children().size() == 2) {
return Make(arrow::util::get<std::string>(literal->holder()), holder);
} else {
Expand Down Expand Up @@ -132,4 +139,18 @@ Status LikeHolder::Make(const std::string& sql_pattern, const std::string& escap
return Status::OK();
}

Status LikeHolder::Make(const std::string& sql_pattern,
std::shared_ptr<LikeHolder>* holder, RE2::Options regex_op) {
std::string pcre_pattern;
ARROW_RETURN_NOT_OK(RegexUtil::SqlLikePatternToPcre(sql_pattern, pcre_pattern));

std::shared_ptr<LikeHolder> lholder;
lholder = std::shared_ptr<LikeHolder>(new LikeHolder(pcre_pattern, regex_op));

ARROW_RETURN_IF(!lholder->regex_.ok(),
Status::Invalid("Building RE2 pattern '", pcre_pattern, "' failed"));

*holder = lholder;
return Status::OK();
}
} // namespace gandiva
6 changes: 6 additions & 0 deletions cpp/src/gandiva/like_holder.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,9 @@ class GANDIVA_EXPORT LikeHolder : public FunctionHolder {
static Status Make(const std::string& sql_pattern, const std::string& escape_char,
std::shared_ptr<LikeHolder>* holder);

static Status Make(const std::string& sql_pattern, std::shared_ptr<LikeHolder>* holder,
RE2::Options regex_op);

// Try and optimise a function node with a "like" pattern.
static const FunctionNode TryOptimize(const FunctionNode& node);

Expand All @@ -51,6 +54,9 @@ class GANDIVA_EXPORT LikeHolder : public FunctionHolder {
private:
explicit LikeHolder(const std::string& pattern) : pattern_(pattern), regex_(pattern) {}

LikeHolder(const std::string& pattern, RE2::Options regex_op)
: pattern_(pattern), regex_(pattern, regex_op) {}

std::string pattern_; // posix pattern string, to help debugging
RE2 regex_; // compiled regex for the pattern

Expand Down
75 changes: 71 additions & 4 deletions cpp/src/gandiva/like_holder_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ namespace gandiva {

class TestLikeHolder : public ::testing::Test {
public:
RE2::Options regex_op;
FunctionNode BuildLike(std::string pattern) {
auto field = std::make_shared<FieldNode>(arrow::field("in", arrow::utf8()));
auto pattern_node =
Expand All @@ -48,7 +49,7 @@ class TestLikeHolder : public ::testing::Test {
TEST_F(TestLikeHolder, TestMatchAny) {
std::shared_ptr<LikeHolder> like_holder;

auto status = LikeHolder::Make("ab%", &like_holder);
auto status = LikeHolder::Make("ab%", &like_holder, regex_op);
EXPECT_EQ(status.ok(), true) << status.message();

auto& like = *like_holder;
Expand All @@ -63,7 +64,7 @@ TEST_F(TestLikeHolder, TestMatchAny) {
TEST_F(TestLikeHolder, TestMatchOne) {
std::shared_ptr<LikeHolder> like_holder;

auto status = LikeHolder::Make("ab_", &like_holder);
auto status = LikeHolder::Make("ab_", &like_holder, regex_op);
EXPECT_EQ(status.ok(), true) << status.message();

auto& like = *like_holder;
Expand All @@ -78,7 +79,7 @@ TEST_F(TestLikeHolder, TestMatchOne) {
TEST_F(TestLikeHolder, TestPcreSpecial) {
std::shared_ptr<LikeHolder> like_holder;

auto status = LikeHolder::Make(".*ab_", &like_holder);
auto status = LikeHolder::Make(".*ab_", &like_holder, regex_op);
EXPECT_EQ(status.ok(), true) << status.message();

auto& like = *like_holder;
Expand All @@ -97,7 +98,7 @@ TEST_F(TestLikeHolder, TestRegexEscape) {
TEST_F(TestLikeHolder, TestDot) {
std::shared_ptr<LikeHolder> like_holder;

auto status = LikeHolder::Make("abc.", &like_holder);
auto status = LikeHolder::Make("abc.", &like_holder, regex_op);
EXPECT_EQ(status.ok(), true) << status.message();

auto& like = *like_holder;
Expand Down Expand Up @@ -211,4 +212,70 @@ TEST_F(TestLikeHolder, TestMultipleEscapeChar) {
auto status = LikeHolder::Make("ab\\_", "\\\\", &like_holder);
EXPECT_EQ(status.ok(), false) << status.message();
}
class TestILikeHolder : public ::testing::Test {
public:
RE2::Options regex_op;
FunctionNode BuildILike(std::string pattern) {
auto field = std::make_shared<FieldNode>(arrow::field("in", arrow::utf8()));
auto pattern_node =
std::make_shared<LiteralNode>(arrow::utf8(), LiteralHolder(pattern), false);
return FunctionNode("ilike", {field, pattern_node}, arrow::boolean());
}
};

TEST_F(TestILikeHolder, TestMatchAny) {
std::shared_ptr<LikeHolder> like_holder;

regex_op.set_case_sensitive(false);
auto status = LikeHolder::Make("ab%", &like_holder, regex_op);
EXPECT_EQ(status.ok(), true) << status.message();

auto& like = *like_holder;
EXPECT_TRUE(like("ab"));
EXPECT_TRUE(like("aBc"));
EXPECT_TRUE(like("ABCD"));

EXPECT_FALSE(like("a"));
EXPECT_FALSE(like("cab"));
}

TEST_F(TestILikeHolder, TestMatchOne) {
std::shared_ptr<LikeHolder> like_holder;

regex_op.set_case_sensitive(false);
auto status = LikeHolder::Make("Ab_", &like_holder, regex_op);
EXPECT_EQ(status.ok(), true) << status.message();

auto& like = *like_holder;
EXPECT_TRUE(like("abc"));
EXPECT_TRUE(like("aBd"));

EXPECT_FALSE(like("A"));
EXPECT_FALSE(like("Abcd"));
EXPECT_FALSE(like("DaBc"));
}

TEST_F(TestILikeHolder, TestPcreSpecial) {
std::shared_ptr<LikeHolder> like_holder;

regex_op.set_case_sensitive(false);
auto status = LikeHolder::Make(".*aB_", &like_holder, regex_op);
EXPECT_EQ(status.ok(), true) << status.message();

auto& like = *like_holder;
EXPECT_TRUE(like(".*Abc")); // . and * aren't special in sql regex
EXPECT_FALSE(like("xxAbc"));
}

TEST_F(TestILikeHolder, TestDot) {
std::shared_ptr<LikeHolder> like_holder;

regex_op.set_case_sensitive(false);
auto status = LikeHolder::Make("aBc.", &like_holder, regex_op);
EXPECT_EQ(status.ok(), true) << status.message();

auto& like = *like_holder;
EXPECT_FALSE(like("abcd"));
}

} // namespace gandiva

0 comments on commit e30f6ca

Please sign in to comment.