diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index fe83826bd51..32817b1d72e 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -622,8 +622,8 @@ const std::unordered_map scalar_func_map({ //{tipb::ScalarFuncSig::Format, "cast"}, //{tipb::ScalarFuncSig::FormatWithLocale, "cast"}, //{tipb::ScalarFuncSig::FromBase64, "cast"}, - //{tipb::ScalarFuncSig::HexIntArg, "cast"}, - //{tipb::ScalarFuncSig::HexStrArg, "cast"}, + {tipb::ScalarFuncSig::HexIntArg, "hexInt"}, + {tipb::ScalarFuncSig::HexStrArg, "hexStr"}, //{tipb::ScalarFuncSig::InsertUTF8, "cast"}, //{tipb::ScalarFuncSig::Insert, "cast"}, //{tipb::ScalarFuncSig::InstrUTF8, "cast"}, diff --git a/dbms/src/Functions/FunctionsString.cpp b/dbms/src/Functions/FunctionsString.cpp index 969d28de7d8..a8727553bd4 100644 --- a/dbms/src/Functions/FunctionsString.cpp +++ b/dbms/src/Functions/FunctionsString.cpp @@ -4083,7 +4083,7 @@ class PadUTF8Impl : public IFunction break; default: throw Exception(fmt::format("the second argument type of {} is invalid, expect integer, got {}", getName(), type_index), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - }; + } } }; @@ -5120,6 +5120,185 @@ class FunctionFormatWithLocale : public IFunction } }; +class FunctionHexStr : public IFunction +{ +public: + static constexpr auto name = "hexStr"; + FunctionHexStr() = default; + + static FunctionPtr create(const Context & /*context*/) + { + return std::make_shared(); + } + + std::string getName() const override { return name; } + size_t getNumberOfArguments() const override { return 1; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!arguments[0]->isStringOrFixedString()) + throw Exception( + fmt::format("Illegal type {} of first argument of function {}", arguments[0]->getName(), getName()), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + return std::make_shared(); + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override + { + const ColumnPtr & column = block.getByPosition(arguments[0]).column; + if (const auto * col = checkAndGetColumn(column.get())) + { + auto col_res = ColumnString::create(); + vector(col->getChars(), col->getOffsets(), col_res->getChars(), col_res->getOffsets()); + block.getByPosition(result).column = std::move(col_res); + } + else if (const auto * col = checkAndGetColumn(column.get())) + { + auto col_res = ColumnFixedString::create(col->getN() * 2); + vectorFixed(col->getChars(), col->getN(), col_res->getChars()); + block.getByPosition(result).column = std::move(col_res); + } + else + throw Exception( + fmt::format("Illegal column {} of argument of function {}", block.getByPosition(arguments[0]).column->getName(), getName()), + ErrorCodes::ILLEGAL_COLUMN); + } + +private: + static constexpr UInt8 hexTable[17] = "0123456789ABCDEF"; + + static void vector(const ColumnString::Chars_t & data, + const ColumnString::Offsets & offsets, + ColumnString::Chars_t & res_data, + ColumnString::Offsets & res_offsets) + { + size_t size = offsets.size(); + // every string contains a tailing zero, which will not be hexed, so minus size to remove these doubled zeros + res_data.resize(data.size() * 2 - size); + res_offsets.resize(size); + + ColumnString::Offset prev_offset = 0; + for (size_t i = 0; i < size; ++i) + { + for (size_t j = prev_offset; j < offsets[i] - 1; ++j) + { + ColumnString::Offset pos = j * 2 - i; + UInt8 byte = data[j]; + res_data[pos] = hexTable[byte >> 4]; + res_data[pos + 1] = hexTable[byte & 0x0f]; + } + // the last element written by the previous loop is: + // `(offsets[i] - 2) * 2 - i + 1 = offsets[i] * 2 - i - 3` + // then the zero should be written to `offsets[i] * 2 - i - 2` + res_data[offsets[i] * 2 - i - 2] = 0; + res_offsets[i] = offsets[i] * 2 - i - 1; + + prev_offset = offsets[i]; + } + } + + static void vectorFixed(const ColumnString::Chars_t & data, size_t length, ColumnString::Chars_t & res_data) + { + size_t size = data.size() / length; + res_data.resize(data.size() * 2); + + for (size_t i = 0; i < size; ++i) + for (size_t j = i * length; j < (i + 1) * length; ++j) + { + ColumnString::Offset pos = j * 2; + UInt8 byte = data[j]; + res_data[pos] = hexTable[byte >> 4]; + res_data[pos + 1] = hexTable[byte & 0x0f]; + } + } +}; + +class FunctionHexInt : public IFunction +{ +public: + static constexpr auto name = "hexInt"; + FunctionHexInt() = default; + + static FunctionPtr create(const Context & /*context*/) + { + return std::make_shared(); + } + + std::string getName() const override { return name; } + size_t getNumberOfArguments() const override { return 1; } + bool useDefaultImplementationForConstants() const override { return true; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (!arguments[0]->isNumber()) + throw Exception( + fmt::format("Illegal type {} of first argument of function {}", arguments[0]->getName(), getName()), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + return std::make_shared(); + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override + { + if (executeHexInt(block, arguments, result) + || executeHexInt(block, arguments, result) + || executeHexInt(block, arguments, result) + || executeHexInt(block, arguments, result) + || executeHexInt(block, arguments, result) + || executeHexInt(block, arguments, result) + || executeHexInt(block, arguments, result) + || executeHexInt(block, arguments, result)) + { + return; + } + else + { + throw Exception(fmt::format("Illegal argument of function {}", getName()), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + } + +private: + template + bool executeHexInt( + Block & block, + const ColumnNumbers & arguments, + const size_t result) const + { + ColumnPtr & column = block.getByPosition(arguments[0]).column; + const auto col = checkAndGetColumn>(column.get()); + if (col == nullptr) + { + return false; + } + size_t size = col->size(); + + auto col_res = ColumnString::create(); + + ColumnString::Chars_t & res_chars = col_res->getChars(); + // Convert a UInt64 to hex, will cost 17 bytes at most + res_chars.reserve(size * 17); + ColumnString::Offsets & res_offsets = col_res->getOffsets(); + res_offsets.resize(size); + + size_t prev_res_offset = 0; + for (size_t i = 0; i < size; ++i) + { + UInt64 number = col->getUInt(i); + + int print_size = sprintf(reinterpret_cast(&res_chars[prev_res_offset]), "%lX", number); + res_chars[prev_res_offset + print_size] = 0; + // Add the size of printed string and a tailing zero + prev_res_offset += print_size + 1; + res_offsets[i] = prev_res_offset; + } + res_chars.resize(prev_res_offset); + + block.getByPosition(result).column = std::move(col_res); + + return true; + } +}; + // clang-format off struct NameEmpty { static constexpr auto name = "empty"; }; struct NameNotEmpty { static constexpr auto name = "notEmpty"; }; @@ -5204,6 +5383,8 @@ void registerFunctionsString(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); + factory.registerFunction(); factory.registerFunction(); } } // namespace DB diff --git a/dbms/src/Functions/tests/gtest_strings_hexint.cpp b/dbms/src/Functions/tests/gtest_strings_hexint.cpp new file mode 100644 index 00000000000..b0539d93616 --- /dev/null +++ b/dbms/src/Functions/tests/gtest_strings_hexint.cpp @@ -0,0 +1,86 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include +#include + + +namespace DB +{ +namespace tests +{ +class HexIntTest : public DB::tests::FunctionTest +{ +}; + +TEST_F(HexIntTest, hexint_all_unit_Test) +try +{ + const String & func_name = "hexInt"; + + ASSERT_COLUMN_EQ( + createColumn>({"1348B21", std::nullopt, "0", "FFFFFFFFFECB74DF", "8000000000000000", "7FFFFFFFFFFFFFFF"}), + executeFunction( + func_name, + createColumn>({20220705, std::nullopt, 0, -20220705, std::numeric_limits::min(), std::numeric_limits::max()}))); + + ASSERT_COLUMN_EQ( + createColumn>({"1348B21", std::nullopt, "0", "0", "FFFFFFFFFFFFFFFF"}), + executeFunction( + func_name, + createColumn>({20220705, std::nullopt, 0, std::numeric_limits::min(), std::numeric_limits::max()}))); + + ASSERT_COLUMN_EQ( + createColumn>({"13414DA", std::nullopt, "0", "FFFFFFFFFECBEB26", "FFFFFFFF80000000", "7FFFFFFF"}), + executeFunction( + func_name, + createColumn>({20190426, std::nullopt, 0, -20190426, std::numeric_limits::min(), std::numeric_limits::max()}))); + + ASSERT_COLUMN_EQ( + createColumn>({"13414DA", std::nullopt, "0", "0", "FFFFFFFF"}), + executeFunction( + func_name, + createColumn>({20190426, std::nullopt, 0, std::numeric_limits::min(), std::numeric_limits::max()}))); + + ASSERT_COLUMN_EQ( + createColumn>({"3039", std::nullopt, "0", "FFFFFFFFFFFFCFC7", "FFFFFFFFFFFF8000", "7FFF"}), + executeFunction( + func_name, + createColumn>({12345, std::nullopt, 0, -12345, std::numeric_limits::min(), std::numeric_limits::max()}))); + + ASSERT_COLUMN_EQ( + createColumn>({"3039", std::nullopt, "0", "0", "FFFF"}), + executeFunction( + func_name, + createColumn>({12345, std::nullopt, 0, std::numeric_limits::min(), std::numeric_limits::max()}))); + + ASSERT_COLUMN_EQ( + createColumn>({"78", std::nullopt, "0", "FFFFFFFFFFFFFF88", "FFFFFFFFFFFFFF80", "7F"}), + executeFunction( + func_name, + createColumn>({120, std::nullopt, 0, -120, std::numeric_limits::min(), std::numeric_limits::max()}))); + + ASSERT_COLUMN_EQ( + createColumn>({"8F", std::nullopt, "0", "0", "FF"}), + executeFunction( + func_name, + createColumn>({143, std::nullopt, 0, std::numeric_limits::min(), std::numeric_limits::max()}))); +} +CATCH +} // namespace tests +} // namespace DB \ No newline at end of file diff --git a/dbms/src/Functions/tests/gtest_strings_hexstr.cpp b/dbms/src/Functions/tests/gtest_strings_hexstr.cpp new file mode 100644 index 00000000000..6d03ec05ef5 --- /dev/null +++ b/dbms/src/Functions/tests/gtest_strings_hexstr.cpp @@ -0,0 +1,58 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +#include +#include + + +namespace DB +{ +namespace tests +{ +class HexStrTest : public DB::tests::FunctionTest +{ +}; + +TEST_F(HexStrTest, hexstr_all_unit_Test) +try +{ + const String & func_name = "hexStr"; + + ASSERT_COLUMN_EQ( + createColumn>({"7777772E70696E676361702E636F6D", "61626364", std::nullopt, ""}), + executeFunction( + func_name, + createColumn>({"www.pingcap.com", "abcd", std::nullopt, ""}))); + + // CJK and emoji + ASSERT_COLUMN_EQ( + createColumn>({"E38195E38289E381ABE585A5", "E6B58BE8AF95E6B58BE8AF95E6B58BE8AF95E6B58BE8AF9561626364E6B58BE8AF95", "F09F8DBB", "F09F8FB4E2808DE298A0EFB88F"}), + executeFunction( + func_name, + createColumn>({"さらに入", "测试测试测试测试abcd测试", "🍻", "🏴‍☠️"}))); + + // Special Empty Character + ASSERT_COLUMN_EQ( + createColumn>({"09", "0A", "20"}), + executeFunction( + func_name, + createColumn>({"\t", "\n", " "}))); +} +CATCH +} // namespace tests +} // namespace DB \ No newline at end of file diff --git a/tests/fullstack-test/expr/hex_int.test b/tests/fullstack-test/expr/hex_int.test new file mode 100644 index 00000000000..dc722cb2c7c --- /dev/null +++ b/tests/fullstack-test/expr/hex_int.test @@ -0,0 +1,29 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +mysql> drop table if exists test.t; +mysql> create table if not exists test.t(a int, b double, c decimal(5, 2), d bigint); + +mysql> insert into test.t values(12345, 123.45, 123.45, 0x7FFFFFFFFFFFFFFF); +mysql> insert into test.t values(12345, NULL, -123.45, 0x7FFFFFFFFFFFFFFF); +mysql> alter table test.t set tiflash replica 1; +func> wait_table test t + +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select hex(a), hex(b), hex(c), hex(d) from test.t; ++--------+--------+------------------+------------------+ +| hex(a) | hex(b) | hex(c) | hex(d) | ++--------+--------+------------------+------------------+ +| 3039 | 7B | 7B | 7FFFFFFFFFFFFFFF | +| 3039 | NULL | FFFFFFFFFFFFFF85 | 7FFFFFFFFFFFFFFF | ++--------+--------+------------------+------------------+ diff --git a/tests/fullstack-test/expr/hex_str.test b/tests/fullstack-test/expr/hex_str.test new file mode 100644 index 00000000000..f5312de9345 --- /dev/null +++ b/tests/fullstack-test/expr/hex_str.test @@ -0,0 +1,43 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +mysql> drop table if exists test.t; +mysql> create table if not exists test.t(a char(100)); + +mysql> insert into test.t values('www.pingcap.com'); +mysql> insert into test.t values('abcd'); +mysql> insert into test.t values('测试测试测试测试abcd测试'); +mysql> insert into test.t values(NULL); +mysql> insert into test.t values('さらに入'); +mysql> insert into test.t values('测试测试测试测试abcd测试'); +mysql> insert into test.t values(''); +mysql> insert into test.t values('🍻'); +mysql> insert into test.t values('🏴‍☠️'); +mysql> insert into test.t values('\t'); +mysql> insert into test.t values('\n'); +mysql> alter table test.t set tiflash replica 1; +func> wait_table test t + +mysql> set tidb_enforce_mpp=1; set tidb_isolation_read_engines='tiflash'; select hex(a) from test.t; +hex(a) +7777772E70696E676361702E636F6D +61626364 +E6B58BE8AF95E6B58BE8AF95E6B58BE8AF95E6B58BE8AF9561626364E6B58BE8AF95 +NULL +E38195E38289E381ABE585A5 +E6B58BE8AF95E6B58BE8AF95E6B58BE8AF95E6B58BE8AF9561626364E6B58BE8AF95 +F09F8DBB +F09F8FB4E2808DE298A0EFB88F +09 +0A