Skip to content

Commit

Permalink
Support other integer types for SubstringUTF8 & RightUTF8 functions (#…
Browse files Browse the repository at this point in the history
…9507) (#9513)

close #9473

Support other integer types for SubstringUTF8 & RightUTF8 functions

Signed-off-by: gengliqi <gengliqiii@gmail.com>

Co-authored-by: Liqi Geng <gengliqiii@gmail.com>
Co-authored-by: gengliqi <gengliqiii@gmail.com>
  • Loading branch information
ti-chi-bot and gengliqi authored Nov 5, 2024
1 parent 826c5ce commit ba9fab8
Show file tree
Hide file tree
Showing 6 changed files with 565 additions and 292 deletions.
485 changes: 282 additions & 203 deletions dbms/src/Functions/FunctionsString.cpp

Large diffs are not rendered by default.

58 changes: 21 additions & 37 deletions dbms/src/Functions/tests/gtest_string_left.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,61 +66,55 @@ class StringLeftTest : public DB::tests::FunctionTest
for (bool is_length_const : is_consts)
inner_test(is_str_const, is_length_const);
}

template <typename Integer>
void testInvalidLengthType()
{
static_assert(!std::is_same_v<Integer, Int64> && !std::is_same_v<Integer, UInt64>);
auto inner_test = [&](bool is_str_const, bool is_length_const) {
ASSERT_THROW(
executeFunction(
func_name,
is_str_const ? createConstColumn<Nullable<String>>(1, "") : createColumn<Nullable<String>>({""}),
is_length_const ? createConstColumn<Nullable<Integer>>(1, 0) : createColumn<Nullable<Integer>>({0})),
Exception);
};
std::vector<bool> is_consts = {true, false};
for (bool is_str_const : is_consts)
for (bool is_length_const : is_consts)
inner_test(is_str_const, is_length_const);
}
};

TEST_F(StringLeftTest, testBoundary)
try
{
testBoundary<Int8>();
testBoundary<Int16>();
testBoundary<Int32>();
testBoundary<Int64>();
testBoundary<UInt8>();
testBoundary<UInt16>();
testBoundary<UInt32>();
testBoundary<UInt64>();
}
CATCH

TEST_F(StringLeftTest, testMoreCases)
try
{
#define CALL(A, B, C) \
test<Int8>(A, B, C); \
test<Int16>(A, B, C); \
test<Int32>(A, B, C); \
test<Int64>(A, B, C); \
test<UInt8>(A, B, C); \
test<UInt16>(A, B, C); \
test<UInt32>(A, B, C); \
test<UInt64>(A, B, C);

// test big string
// big_string.size() > length
String big_string;
// unit_string length = 22
String unit_string = "big string is 我!!!!!!!";
for (size_t i = 0; i < 1000; ++i)
big_string += unit_string;
test<Int64>(big_string, 22, unit_string);
test<UInt64>(big_string, 22, unit_string);
CALL(big_string, 22, unit_string);

// test origin_str.size() == length
String origin_str = "我的 size = 12";
test<Int64>(origin_str, 12, origin_str);
test<UInt64>(origin_str, 12, origin_str);
CALL(origin_str, 12, origin_str);

// test origin_str.size() < length
test<Int64>(origin_str, 22, origin_str);
test<UInt64>(origin_str, 22, origin_str);
CALL(origin_str, 22, origin_str);

// Mixed language
String english_str = "This is English";
String mixed_language_str = english_str + ",这是中文,C'est français,これが日本の";
test<Int64>(mixed_language_str, english_str.size(), english_str);
test<UInt64>(mixed_language_str, english_str.size(), english_str);
CALL(mixed_language_str, english_str.size(), english_str);

// column size != 1
// case 1
Expand All @@ -144,18 +138,8 @@ try
func_name,
createConstColumn<Nullable<String>>(8, second_case_string),
createColumn<Nullable<Int64>>({0, 1, 0, 1, 0, 0, 1, 1})));
}
CATCH

TEST_F(StringLeftTest, testInvalidLengthType)
try
{
testInvalidLengthType<Int8>();
testInvalidLengthType<Int16>();
testInvalidLengthType<Int32>();
testInvalidLengthType<UInt8>();
testInvalidLengthType<UInt16>();
testInvalidLengthType<UInt32>();
#undef CALL
}
CATCH

Expand Down
58 changes: 21 additions & 37 deletions dbms/src/Functions/tests/gtest_strings_right.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -65,61 +65,55 @@ class StringRightTest : public DB::tests::FunctionTest
for (bool is_length_const : is_consts)
inner_test(is_str_const, is_length_const);
}

template <typename Integer>
void testInvalidLengthType()
{
static_assert(!std::is_same_v<Integer, Int64> && !std::is_same_v<Integer, UInt64>);
auto inner_test = [&](bool is_str_const, bool is_length_const) {
ASSERT_THROW(
executeFunction(
func_name,
is_str_const ? createConstColumn<Nullable<String>>(1, "") : createColumn<Nullable<String>>({""}),
is_length_const ? createConstColumn<Nullable<Integer>>(1, 0) : createColumn<Nullable<Integer>>({0})),
Exception);
};
std::vector<bool> is_consts = {true, false};
for (bool is_str_const : is_consts)
for (bool is_length_const : is_consts)
inner_test(is_str_const, is_length_const);
}
};

TEST_F(StringRightTest, testBoundary)
try
{
testBoundary<Int8>();
testBoundary<Int16>();
testBoundary<Int32>();
testBoundary<Int64>();
testBoundary<UInt8>();
testBoundary<UInt16>();
testBoundary<UInt32>();
testBoundary<UInt64>();
}
CATCH

TEST_F(StringRightTest, testMoreCases)
try
{
#define CALL(A, B, C) \
test<Int8>(A, B, C); \
test<Int16>(A, B, C); \
test<Int32>(A, B, C); \
test<Int64>(A, B, C); \
test<UInt8>(A, B, C); \
test<UInt16>(A, B, C); \
test<UInt32>(A, B, C); \
test<UInt64>(A, B, C);

// test big string
// big_string.size() > length
String big_string;
// unit_string length = 22
String unit_string = "big string is 我!!!!!!!";
for (size_t i = 0; i < 1000; ++i)
big_string += unit_string;
test<Int64>(big_string, 22, unit_string);
test<UInt64>(big_string, 22, unit_string);
CALL(big_string, 22, unit_string);

// test origin_str.size() == length
String origin_str = "我的 size = 12";
test<Int64>(origin_str, 12, origin_str);
test<UInt64>(origin_str, 12, origin_str);
CALL(origin_str, 12, origin_str);

// test origin_str.size() < length
test<Int64>(origin_str, 22, origin_str);
test<UInt64>(origin_str, 22, origin_str);
CALL(origin_str, 22, origin_str);

// Mixed language
String english_str = "This is English";
String mixed_language_str = "这是中文,C'est français,これが日本の," + english_str;
test<Int64>(mixed_language_str, english_str.size(), english_str);
test<UInt64>(mixed_language_str, english_str.size(), english_str);
CALL(mixed_language_str, english_str.size(), english_str);

// column size != 1
// case 1
Expand All @@ -143,18 +137,8 @@ try
func_name,
createConstColumn<Nullable<String>>(8, second_case_string),
createColumn<Nullable<Int64>>({0, 1, 0, 1, 0, 0, 1, 1})));
}
CATCH

TEST_F(StringRightTest, testInvalidLengthType)
try
{
testInvalidLengthType<Int8>();
testInvalidLengthType<Int16>();
testInvalidLengthType<Int32>();
testInvalidLengthType<UInt8>();
testInvalidLengthType<UInt16>();
testInvalidLengthType<UInt32>();
#undef CALL
}
CATCH

Expand Down
163 changes: 153 additions & 10 deletions dbms/src/Functions/tests/gtest_substring.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,160 @@ class SubString : public DB::tests::FunctionTest
{
};

template <typename T1, typename T2>
class TestNullableSigned
{
public:
static void run(SubString & sub_string)
{
ASSERT_COLUMN_EQ(
createColumn<Nullable<String>>({"p.co", "ww.p", "pingcap", "com", ".com", "", "", "", {}, {}, {}}),
sub_string.executeFunction(
"substringUTF8",
createColumn<Nullable<String>>(
{"www.pingcap.com",
"ww.pingcap.com",
"w.pingcap.com",
".pingcap.com",
"pingcap.com",
"pingcap.com",
"pingcap.com",
"pingcap.com",
{},
"pingcap",
"pingcap"}),
createColumn<T1>({-5, 1, 3, -3, 8, 2, -100, 0, 2, {}, -3}),
createColumn<T2>({4, 4, 7, 4, 5, -5, 2, 3, 6, 4, {}})));
}
};

template <typename T1, typename T2>
class TestSigned
{
public:
static void run(SubString & sub_string)
{
ASSERT_COLUMN_EQ(
createColumn<Nullable<String>>({"p.co", "ww.p", "pingcap", "com", ".com", "", "", "", {}}),
sub_string.executeFunction(
"substringUTF8",
createColumn<Nullable<String>>(
{"www.pingcap.com",
"ww.pingcap.com",
"w.pingcap.com",
".pingcap.com",
"pingcap.com",
"pingcap.com",
"pingcap.com",
"pingcap.com",
{}}),
createColumn<T1>({-5, 1, 3, -3, 8, 2, -100, 0, 2}),
createColumn<T2>({4, 4, 7, 4, 5, -5, 2, 3, 6})));
}
};

template <typename T1, typename T2>
class TestNullableUnsigned
{
public:
static void run(SubString & sub_string)
{
ASSERT_COLUMN_EQ(
createColumn<Nullable<String>>({"p.co", "ww.p", "pingcap", "com", ".com", "", "", {}, {}, {}}),
sub_string.executeFunction(
"substringUTF8",
createColumn<Nullable<String>>(
{"www.pingcap.com",
"ww.pingcap.com",
"w.pingcap.com",
".pingcap.com",
"pingcap.com",
"pingcap.com",
"pingcap.com",
{},
"pingcap",
"pingcap"}),
createColumn<T1>({11, 1, 3, 10, 8, 2, 0, 9, {}, 7}),
createColumn<T2>({4, 4, 7, 4, 5, 0, 3, 6, 1, {}})));
}
};

template <typename T1, typename T2>
class TestUnsigned
{
public:
static void run(SubString & sub_string)
{
ASSERT_COLUMN_EQ(
createColumn<Nullable<String>>({"p.co", "ww.p", "pingcap", "com", ".com", "", "", {}}),
sub_string.executeFunction(
"substringUTF8",
createColumn<Nullable<String>>(
{"www.pingcap.com",
"ww.pingcap.com",
"w.pingcap.com",
".pingcap.com",
"pingcap.com",
"pingcap.com",
"pingcap.com",
{}}),
createColumn<T1>({11, 1, 3, 10, 8, 2, 0, 2}),
createColumn<T2>({4, 4, 7, 4, 5, 0, 3, 1})));
}
};

template <typename T1, typename T2>
class TestConstPos
{
public:
static void run(SubString & sub_string)
{
ASSERT_COLUMN_EQ(
createColumn<Nullable<String>>({"w", "ww", "w.p", ".pin"}),
sub_string.executeFunction(
"substringUTF8",
createColumn<Nullable<String>>({"www.pingcap.com", "ww.pingcap.com", "w.pingcap.com", ".pingcap.com"}),
createConstColumn<T1>(4, 1),
createColumn<T2>({1, 2, 3, 4})));
}
};

template <typename T1, typename T2>
class TestConstLength
{
public:
static void run(SubString & sub_string)
{
ASSERT_COLUMN_EQ(
createColumn<Nullable<String>>({"www.", "w.pi", "ping", "ngca"}),
sub_string.executeFunction(
"substringUTF8",
createColumn<Nullable<String>>({"www.pingcap.com", "ww.pingcap.com", "w.pingcap.com", ".pingcap.com"}),
createColumn<T1>({1, 2, 3, 4}),
createConstColumn<T1>(4, 4)));
}
};

TEST_F(SubString, subStringUTF8Test)
try
{
TestTypePair<TestNullableIntTypes, TestNullableIntTypes, TestNullableSigned, SubString>::run(*this);
TestTypePair<TestAllIntTypes, TestAllIntTypes, TestSigned, SubString>::run(*this);

TestTypePair<TestNullableIntTypes, TestNullableUIntTypes, TestNullableUnsigned, SubString>::run(*this);
TestTypePair<TestNullableUIntTypes, TestNullableIntTypes, TestNullableUnsigned, SubString>::run(*this);
TestTypePair<TestNullableUIntTypes, TestNullableUIntTypes, TestNullableUnsigned, SubString>::run(*this);

TestTypePair<TestAllIntTypes, TestAllUIntTypes, TestUnsigned, SubString>::run(*this);
TestTypePair<TestAllUIntTypes, TestAllIntTypes, TestUnsigned, SubString>::run(*this);
TestTypePair<TestAllUIntTypes, TestAllUIntTypes, TestUnsigned, SubString>::run(*this);

TestTypePair<TestAllIntTypes, TestAllIntTypes, TestConstPos, SubString>::run(*this);
TestTypePair<TestAllUIntTypes, TestAllUIntTypes, TestConstPos, SubString>::run(*this);

TestTypePair<TestAllIntTypes, TestAllIntTypes, TestConstLength, SubString>::run(*this);
TestTypePair<TestAllUIntTypes, TestAllUIntTypes, TestConstLength, SubString>::run(*this);

// column, const, const
ASSERT_COLUMN_EQ(
createColumn<Nullable<String>>({"www.", "ww.p", "w.pi", ".pin"}),
Expand All @@ -38,6 +189,7 @@ try
createColumn<Nullable<String>>({"www.pingcap.com", "ww.pingcap.com", "w.pingcap.com", ".pingcap.com"}),
createConstColumn<Nullable<Int64>>(4, 1),
createConstColumn<Nullable<Int64>>(4, 4)));

// const, const, const
ASSERT_COLUMN_EQ(
createConstColumn<String>(1, "www."),
Expand All @@ -46,17 +198,8 @@ try
createConstColumn<Nullable<String>>(1, "www.pingcap.com"),
createConstColumn<Nullable<Int64>>(1, 1),
createConstColumn<Nullable<Int64>>(1, 4)));
// Test Null
ASSERT_COLUMN_EQ(
createColumn<Nullable<String>>({{}, "www."}),
executeFunction(
"substringUTF8",
createColumn<Nullable<String>>(
{{}, "www.pingcap.com"}),
createConstColumn<Nullable<Int64>>(2, 1),
createConstColumn<Nullable<Int64>>(2, 4)));
}
CATCH

} // namespace tests
} // namespace DB
} // namespace DB
Loading

0 comments on commit ba9fab8

Please sign in to comment.