From d967a9a52004900bee57c1856e6264b80b945b3c Mon Sep 17 00:00:00 2001 From: dongjunduo Date: Tue, 28 Jun 2022 02:28:54 -0700 Subject: [PATCH 01/11] [function] push down 'bin' function --- dbms/src/Common/hex.cpp | 36 +++++++ dbms/src/Common/hex.h | 15 +++ dbms/src/Flash/Coprocessor/DAGUtils.cpp | 2 +- dbms/src/Functions/FunctionsString.cpp | 125 ++++++++++++++++++++++++ dbms/src/Functions/tests/gtest_bin.cpp | 64 ++++++++++++ tests/fullstack-test/expr/bin.test | 57 +++++++++++ 6 files changed, 298 insertions(+), 1 deletion(-) create mode 100644 dbms/src/Functions/tests/gtest_bin.cpp create mode 100644 tests/fullstack-test/expr/bin.test diff --git a/dbms/src/Common/hex.cpp b/dbms/src/Common/hex.cpp index e9d047e3207..74a00cc34be 100644 --- a/dbms/src/Common/hex.cpp +++ b/dbms/src/Common/hex.cpp @@ -67,3 +67,39 @@ const char * const hex_char_to_digit_table = "\xff\xff\xff\xff\xff\xff\xff\xff\x "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"; + +const char * const bin_byte_to_char_table = + "0000000000000001000000100000001100000100000001010000011000000111" + "0000100000001001000010100000101100001100000011010000111000001111" + "0001000000010001000100100001001100010100000101010001011000010111" + "0001100000011001000110100001101100011100000111010001111000011111" + "0010000000100001001000100010001100100100001001010010011000100111" + "0010100000101001001010100010101100101100001011010010111000101111" + "0011000000110001001100100011001100110100001101010011011000110111" + "0011100000111001001110100011101100111100001111010011111000111111" + "0100000001000001010000100100001101000100010001010100011001000111" + "0100100001001001010010100100101101001100010011010100111001001111" + "0101000001010001010100100101001101010100010101010101011001010111" + "0101100001011001010110100101101101011100010111010101111001011111" + "0110000001100001011000100110001101100100011001010110011001100111" + "0110100001101001011010100110101101101100011011010110111001101111" + "0111000001110001011100100111001101110100011101010111011001110111" + "0111100001111001011110100111101101111100011111010111111001111111" + "1000000010000001100000101000001110000100100001011000011010000111" + "1000100010001001100010101000101110001100100011011000111010001111" + "1001000010010001100100101001001110010100100101011001011010010111" + "1001100010011001100110101001101110011100100111011001111010011111" + "1010000010100001101000101010001110100100101001011010011010100111" + "1010100010101001101010101010101110101100101011011010111010101111" + "1011000010110001101100101011001110110100101101011011011010110111" + "1011100010111001101110101011101110111100101111011011111010111111" + "1100000011000001110000101100001111000100110001011100011011000111" + "1100100011001001110010101100101111001100110011011100111011001111" + "1101000011010001110100101101001111010100110101011101011011010111" + "1101100011011001110110101101101111011100110111011101111011011111" + "1110000011100001111000101110001111100100111001011110011011100111" + "1110100011101001111010101110101111101100111011011110111011101111" + "1111000011110001111100101111001111110100111101011111011011110111" + "1111100011111001111110101111101111111100111111011111111011111111"; + +const size_t bin_byte_no_zero_prefix_len[256] = {1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8}; diff --git a/dbms/src/Common/hex.h b/dbms/src/Common/hex.h index 3ed8aa5dac4..7ec97932107 100644 --- a/dbms/src/Common/hex.h +++ b/dbms/src/Common/hex.h @@ -52,6 +52,21 @@ inline void writeHexByteLowercase(UInt8 byte, void * out) memcpy(out, &hex_byte_to_char_lowercase_table[static_cast(byte) * 2], 2); } +extern const char * const bin_byte_to_char_table; +extern const size_t bin_byte_no_zero_prefix_len[]; + +inline void writeBinByte(UInt8 byte, void * out) +{ + memcpy(out, &bin_byte_to_char_table[static_cast(byte) * 8], 8); +} + +inline size_t writeNoZeroPrefixBinByte(UInt8 byte, void * out) +{ + size_t len = bin_byte_no_zero_prefix_len[static_cast(byte)]; + memcpy(out, &bin_byte_to_char_table[byte * 8 + (8 - len)], len); + return len; +} + /// Produces hex representation of an unsigned int with leading zeros (for checksums) template inline void writeHexUIntImpl(TUInt uint_, char * out, const char * const table) diff --git a/dbms/src/Flash/Coprocessor/DAGUtils.cpp b/dbms/src/Flash/Coprocessor/DAGUtils.cpp index 9ffa29cd14d..121658f14b5 100644 --- a/dbms/src/Flash/Coprocessor/DAGUtils.cpp +++ b/dbms/src/Flash/Coprocessor/DAGUtils.cpp @@ -603,7 +603,7 @@ const std::unordered_map scalar_func_map({ {tipb::ScalarFuncSig::TimestampDiff, "tidbTimestampDiff"}, //{tipb::ScalarFuncSig::BitLength, "cast"}, - //{tipb::ScalarFuncSig::Bin, "cast"}, + {tipb::ScalarFuncSig::Bin, "bin"}, {tipb::ScalarFuncSig::ASCII, "ascii"}, //{tipb::ScalarFuncSig::Char, "cast"}, {tipb::ScalarFuncSig::CharLengthUTF8, "lengthUTF8"}, diff --git a/dbms/src/Functions/FunctionsString.cpp b/dbms/src/Functions/FunctionsString.cpp index b9f20e45134..33d2c7c4a77 100644 --- a/dbms/src/Functions/FunctionsString.cpp +++ b/dbms/src/Functions/FunctionsString.cpp @@ -13,6 +13,7 @@ // limitations under the License. #include +#include #include #include #include @@ -4910,6 +4911,129 @@ class FunctionFormatWithLocale : public IFunction } }; +class FunctionBin : public IFunction +{ +public: + static constexpr auto name = "bin"; + static constexpr size_t word_size = 8; + FunctionBin() = default; + + static FunctionPtr create(const Context & /*context*/) + { + return std::make_shared(); + } + + std::string getName() const override { return name; } + size_t getNumberOfArguments() const override { return 1; } + + DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override + { + if (arguments.size() != 1) + throw Exception( + fmt::format("Number of arguments for function {} doesn't match: passed {}, should be 1.", getName(), arguments.size()), + ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH); + + auto first_argument = removeNullable(arguments[0]); + if (!first_argument->isInteger()) + throw Exception( + fmt::format("Illegal type {} of first argument of function {}", first_argument->getName(), getName()), + ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + + return std::make_shared(); + } + + template + static void executeOneUIntOrInt(T xx, char *& out, bool skip_leading_zero = true, bool auto_close = true) + { + long long x = static_cast(xx); + bool was_nonzero = false; + bool was_first_nonzero_byte = true; + for (int offset = (sizeof(long long) - 1) * 8; offset >= 0; offset -= 8) + { + UInt8 byte = x >> offset; + /// Skip leading zeros + if (byte == 0 && !was_nonzero && offset && skip_leading_zero) //-V560 + continue; + was_nonzero = true; + if (was_first_nonzero_byte) + { + out += writeNoZeroPrefixBinByte(byte, out); + was_first_nonzero_byte = false; + } + else + { + writeBinByte(byte, out); + out += word_size; + } + } + if (auto_close) + { + *out = '\0'; + ++out; + } + } + + template + bool tryExecuteUIntOrInt(const IColumn * col, ColumnPtr & col_res) const + { + const ColumnVector * col_vec = checkAndGetColumn>(col); + static constexpr size_t MAX_LENGTH = sizeof(Int64) * word_size + 1; /// Including trailing zero byte. + if (col_vec) + { + auto col_str = ColumnString::create(); + ColumnString::Chars_t & out_vec = col_str->getChars(); + ColumnString::Offsets & out_offsets = col_str->getOffsets(); + const typename ColumnVector::Container & in_vec = col_vec->getData(); + size_t size = in_vec.size(); + out_offsets.resize(size); + out_vec.resize(size * (word_size+1) + MAX_LENGTH); /// word_size+1 is length of one byte in hex/bin plus zero byte. + size_t pos = 0; + for (size_t i = 0; i < size; ++i) + { + /// Manual exponential growth, so as not to rely on the linear amortized work time of `resize` (no one guarantees it). + if (pos + MAX_LENGTH > out_vec.size()) + out_vec.resize(out_vec.size() * word_size + MAX_LENGTH); + char * begin = reinterpret_cast(&out_vec[pos]); + char * end = begin; + executeOneUIntOrInt(in_vec[i], end); + pos += end - begin; + out_offsets[i] = pos; + } + out_vec.resize(pos); + col_res = std::move(col_str); + return true; + } + else + { + return false; + } + } + + void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override + { + const IColumn * column = block.getByPosition(arguments[0]).column.get(); + ColumnPtr res_column; + if (tryExecuteUIntOrInt(column, res_column) || + tryExecuteUIntOrInt(column, res_column) || + tryExecuteUIntOrInt(column, res_column) || + tryExecuteUIntOrInt(column, res_column) || + tryExecuteUIntOrInt(column, res_column) || + tryExecuteUIntOrInt(column, res_column) || + tryExecuteUIntOrInt(column, res_column) || + tryExecuteUIntOrInt(column, res_column)) + { + block.getByPosition(result).column = std::move(res_column); + return; + } + else + { + throw Exception(fmt::format("Illegal argument of function {}", getName()), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); + } + } + +private: +}; + // clang-format off struct NameEmpty { static constexpr auto name = "empty"; }; struct NameNotEmpty { static constexpr auto name = "notEmpty"; }; @@ -4994,5 +5118,6 @@ void registerFunctionsString(FunctionFactory & factory) factory.registerFunction(); factory.registerFunction(); factory.registerFunction(); + factory.registerFunction(); } } // namespace DB diff --git a/dbms/src/Functions/tests/gtest_bin.cpp b/dbms/src/Functions/tests/gtest_bin.cpp new file mode 100644 index 00000000000..5f97d768186 --- /dev/null +++ b/dbms/src/Functions/tests/gtest_bin.cpp @@ -0,0 +1,64 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +namespace DB::tests +{ +class TestBin : public DB::tests::FunctionTest +{ +}; + +TEST_F(TestBin, Simple) +try +{ + ASSERT_COLUMN_EQ( + createColumn({"1100100"}), + executeFunction("bin", createColumn({100}))); +} +CATCH + +TEST_F(TestBin, Boundary) +try +{ + ASSERT_COLUMN_EQ( + createColumn({"1111111111111111111111111111111111111111111111111111111110000000", "1111111"}), + executeFunction("bin", createColumn({INT8_MIN, INT8_MAX}))); + ASSERT_COLUMN_EQ( + createColumn({"1111111111111111111111111111111111111111111111111000000000000000", "111111111111111"}), + executeFunction("bin", createColumn({INT16_MIN, INT16_MAX}))); + ASSERT_COLUMN_EQ( + createColumn({"1111111111111111111111111111111110000000000000000000000000000000", "1111111111111111111111111111111"}), + executeFunction("bin", createColumn({INT32_MIN, INT32_MAX}))); + ASSERT_COLUMN_EQ( + createColumn({"1000000000000000000000000000000000000000000000000000000000000000", + "111111111111111111111111111111111111111111111111111111111111111"}), + executeFunction("bin", createColumn({INT64_MIN, INT64_MAX}))); + ASSERT_COLUMN_EQ( + createColumn({"0", "11111111"}), + executeFunction("bin", createColumn({0, 255}))); + ASSERT_COLUMN_EQ( + createColumn({"0", "1111111111111111"}), + executeFunction("bin", createColumn({0, 65535}))); + ASSERT_COLUMN_EQ( + createColumn({"0", "11111111111111111111111111111111"}), + executeFunction("bin", createColumn({0, 4294967295}))); + ASSERT_COLUMN_EQ( + createColumn({"0", "1111111111111111111111111111111111111111111111111111111111111111"}), + executeFunction("bin", createColumn({0, ULLONG_MAX}))); +} +CATCH + +} // namespace DB::tests + diff --git a/tests/fullstack-test/expr/bin.test b/tests/fullstack-test/expr/bin.test new file mode 100644 index 00000000000..bdb1a861605 --- /dev/null +++ b/tests/fullstack-test/expr/bin.test @@ -0,0 +1,57 @@ +# Copyright 2022 PingCAP, Ltd. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +mysql> drop table if exists test.t1; +mysql> create table test.t1(c1 bigint); +mysql> insert into test.t1 values(0); +mysql> insert into test.t1 values(1); +mysql> insert into test.t1 values(44); +mysql> insert into test.t1 values(100); +mysql> insert into test.t1 values(-9223372036854775808); +mysql> insert into test.t1 values(9223372036854775807); +mysql> alter table test.t1 set tiflash replica 1; +mysql> drop table if exists test.t2; +mysql> create table test.t2(c1 bigint unsigned); +mysql> insert into test.t2 values(0); +mysql> insert into test.t2 values(1); +mysql> insert into test.t2 values(44); +mysql> insert into test.t2 values(100); +mysql> insert into test.t2 values(18446744073709551615); +mysql> alter table test.t2 set tiflash replica 1; + +func> wait_table test t1 +func> wait_table test t2 + +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select bin(c1) from test.t1; ++------------------------------------------------------------------+ +| bin(c1) | ++------------------------------------------------------------------+ +| 0 | +| 1 | +| 101100 | +| 1100100 | +| 1000000000000000000000000000000000000000000000000000000000000000 | +| 111111111111111111111111111111111111111111111111111111111111111 | ++------------------------------------------------------------------+ +mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select bin(c1) from test.t2; ++------------------------------------------------------------------+ +| bin(c1) | ++------------------------------------------------------------------+ +| 0 | +| 1 | +| 101100 | +| 1100100 | +| 1111111111111111111111111111111111111111111111111111111111111111 | ++------------------------------------------------------------------+ + From a77c923ae8f456553df1c43bd552e489bfaf09ea Mon Sep 17 00:00:00 2001 From: dongjunduo Date: Tue, 28 Jun 2022 04:21:33 -0700 Subject: [PATCH 02/11] format source --- dbms/src/Common/hex.cpp | 65 +++++++++++++------------- dbms/src/Functions/FunctionsString.cpp | 53 +++++++++------------ dbms/src/Functions/tests/gtest_bin.cpp | 5 +- 3 files changed, 57 insertions(+), 66 deletions(-) diff --git a/dbms/src/Common/hex.cpp b/dbms/src/Common/hex.cpp index 74a00cc34be..45e31b5a549 100644 --- a/dbms/src/Common/hex.cpp +++ b/dbms/src/Common/hex.cpp @@ -68,38 +68,37 @@ const char * const hex_char_to_digit_table = "\xff\xff\xff\xff\xff\xff\xff\xff\x "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff" "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"; -const char * const bin_byte_to_char_table = - "0000000000000001000000100000001100000100000001010000011000000111" - "0000100000001001000010100000101100001100000011010000111000001111" - "0001000000010001000100100001001100010100000101010001011000010111" - "0001100000011001000110100001101100011100000111010001111000011111" - "0010000000100001001000100010001100100100001001010010011000100111" - "0010100000101001001010100010101100101100001011010010111000101111" - "0011000000110001001100100011001100110100001101010011011000110111" - "0011100000111001001110100011101100111100001111010011111000111111" - "0100000001000001010000100100001101000100010001010100011001000111" - "0100100001001001010010100100101101001100010011010100111001001111" - "0101000001010001010100100101001101010100010101010101011001010111" - "0101100001011001010110100101101101011100010111010101111001011111" - "0110000001100001011000100110001101100100011001010110011001100111" - "0110100001101001011010100110101101101100011011010110111001101111" - "0111000001110001011100100111001101110100011101010111011001110111" - "0111100001111001011110100111101101111100011111010111111001111111" - "1000000010000001100000101000001110000100100001011000011010000111" - "1000100010001001100010101000101110001100100011011000111010001111" - "1001000010010001100100101001001110010100100101011001011010010111" - "1001100010011001100110101001101110011100100111011001111010011111" - "1010000010100001101000101010001110100100101001011010011010100111" - "1010100010101001101010101010101110101100101011011010111010101111" - "1011000010110001101100101011001110110100101101011011011010110111" - "1011100010111001101110101011101110111100101111011011111010111111" - "1100000011000001110000101100001111000100110001011100011011000111" - "1100100011001001110010101100101111001100110011011100111011001111" - "1101000011010001110100101101001111010100110101011101011011010111" - "1101100011011001110110101101101111011100110111011101111011011111" - "1110000011100001111000101110001111100100111001011110011011100111" - "1110100011101001111010101110101111101100111011011110111011101111" - "1111000011110001111100101111001111110100111101011111011011110111" - "1111100011111001111110101111101111111100111111011111111011111111"; +const char * const bin_byte_to_char_table = "0000000000000001000000100000001100000100000001010000011000000111" + "0000100000001001000010100000101100001100000011010000111000001111" + "0001000000010001000100100001001100010100000101010001011000010111" + "0001100000011001000110100001101100011100000111010001111000011111" + "0010000000100001001000100010001100100100001001010010011000100111" + "0010100000101001001010100010101100101100001011010010111000101111" + "0011000000110001001100100011001100110100001101010011011000110111" + "0011100000111001001110100011101100111100001111010011111000111111" + "0100000001000001010000100100001101000100010001010100011001000111" + "0100100001001001010010100100101101001100010011010100111001001111" + "0101000001010001010100100101001101010100010101010101011001010111" + "0101100001011001010110100101101101011100010111010101111001011111" + "0110000001100001011000100110001101100100011001010110011001100111" + "0110100001101001011010100110101101101100011011010110111001101111" + "0111000001110001011100100111001101110100011101010111011001110111" + "0111100001111001011110100111101101111100011111010111111001111111" + "1000000010000001100000101000001110000100100001011000011010000111" + "1000100010001001100010101000101110001100100011011000111010001111" + "1001000010010001100100101001001110010100100101011001011010010111" + "1001100010011001100110101001101110011100100111011001111010011111" + "1010000010100001101000101010001110100100101001011010011010100111" + "1010100010101001101010101010101110101100101011011010111010101111" + "1011000010110001101100101011001110110100101101011011011010110111" + "1011100010111001101110101011101110111100101111011011111010111111" + "1100000011000001110000101100001111000100110001011100011011000111" + "1100100011001001110010101100101111001100110011011100111011001111" + "1101000011010001110100101101001111010100110101011101011011010111" + "1101100011011001110110101101101111011100110111011101111011011111" + "1110000011100001111000101110001111100100111001011110011011100111" + "1110100011101001111010101110101111101100111011011110111011101111" + "1111000011110001111100101111001111110100111101011111011011110111" + "1111100011111001111110101111101111111100111111011111111011111111"; const size_t bin_byte_no_zero_prefix_len[256] = {1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8}; diff --git a/dbms/src/Functions/FunctionsString.cpp b/dbms/src/Functions/FunctionsString.cpp index 33d2c7c4a77..099715e20bd 100644 --- a/dbms/src/Functions/FunctionsString.cpp +++ b/dbms/src/Functions/FunctionsString.cpp @@ -13,10 +13,10 @@ // limitations under the License. #include -#include #include #include #include +#include #include #include #include @@ -4945,9 +4945,9 @@ class FunctionBin : public IFunction template static void executeOneUIntOrInt(T xx, char *& out, bool skip_leading_zero = true, bool auto_close = true) { - long long x = static_cast(xx); - bool was_nonzero = false; - bool was_first_nonzero_byte = true; + long long x = static_cast(xx); + bool was_nonzero = false; + bool was_first_nonzero_byte = true; for (int offset = (sizeof(long long) - 1) * 8; offset >= 0; offset -= 8) { UInt8 byte = x >> offset; @@ -4955,16 +4955,16 @@ class FunctionBin : public IFunction if (byte == 0 && !was_nonzero && offset && skip_leading_zero) //-V560 continue; was_nonzero = true; - if (was_first_nonzero_byte) - { - out += writeNoZeroPrefixBinByte(byte, out); - was_first_nonzero_byte = false; - } - else - { + if (was_first_nonzero_byte) + { + out += writeNoZeroPrefixBinByte(byte, out); + was_first_nonzero_byte = false; + } + else + { writeBinByte(byte, out); out += word_size; - } + } } if (auto_close) { @@ -4977,7 +4977,7 @@ class FunctionBin : public IFunction bool tryExecuteUIntOrInt(const IColumn * col, ColumnPtr & col_res) const { const ColumnVector * col_vec = checkAndGetColumn>(col); - static constexpr size_t MAX_LENGTH = sizeof(Int64) * word_size + 1; /// Including trailing zero byte. + static constexpr size_t MAX_LENGTH = sizeof(Int64) * word_size + 1; /// Including trailing zero byte. if (col_vec) { auto col_str = ColumnString::create(); @@ -4986,7 +4986,7 @@ class FunctionBin : public IFunction const typename ColumnVector::Container & in_vec = col_vec->getData(); size_t size = in_vec.size(); out_offsets.resize(size); - out_vec.resize(size * (word_size+1) + MAX_LENGTH); /// word_size+1 is length of one byte in hex/bin plus zero byte. + out_vec.resize(size * (word_size + 1) + MAX_LENGTH); /// word_size+1 is length of one byte in hex/bin plus zero byte. size_t pos = 0; for (size_t i = 0; i < size; ++i) { @@ -4996,7 +4996,7 @@ class FunctionBin : public IFunction char * begin = reinterpret_cast(&out_vec[pos]); char * end = begin; executeOneUIntOrInt(in_vec[i], end); - pos += end - begin; + pos += end - begin; out_offsets[i] = pos; } out_vec.resize(pos); @@ -5013,22 +5013,15 @@ class FunctionBin : public IFunction { const IColumn * column = block.getByPosition(arguments[0]).column.get(); ColumnPtr res_column; - if (tryExecuteUIntOrInt(column, res_column) || - tryExecuteUIntOrInt(column, res_column) || - tryExecuteUIntOrInt(column, res_column) || - tryExecuteUIntOrInt(column, res_column) || - tryExecuteUIntOrInt(column, res_column) || - tryExecuteUIntOrInt(column, res_column) || - tryExecuteUIntOrInt(column, res_column) || - tryExecuteUIntOrInt(column, res_column)) - { - block.getByPosition(result).column = std::move(res_column); - return; - } - else - { + if (tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column)) + { + block.getByPosition(result).column = std::move(res_column); + return; + } + else + { throw Exception(fmt::format("Illegal argument of function {}", getName()), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT); - } + } } private: diff --git a/dbms/src/Functions/tests/gtest_bin.cpp b/dbms/src/Functions/tests/gtest_bin.cpp index 5f97d768186..72381e2b375 100644 --- a/dbms/src/Functions/tests/gtest_bin.cpp +++ b/dbms/src/Functions/tests/gtest_bin.cpp @@ -42,8 +42,8 @@ try createColumn({"1111111111111111111111111111111110000000000000000000000000000000", "1111111111111111111111111111111"}), executeFunction("bin", createColumn({INT32_MIN, INT32_MAX}))); ASSERT_COLUMN_EQ( - createColumn({"1000000000000000000000000000000000000000000000000000000000000000", - "111111111111111111111111111111111111111111111111111111111111111"}), + createColumn({"1000000000000000000000000000000000000000000000000000000000000000", + "111111111111111111111111111111111111111111111111111111111111111"}), executeFunction("bin", createColumn({INT64_MIN, INT64_MAX}))); ASSERT_COLUMN_EQ( createColumn({"0", "11111111"}), @@ -61,4 +61,3 @@ try CATCH } // namespace DB::tests - From 8d24f8d1ca51b30c0d0a2cae0a1e1483927e48de Mon Sep 17 00:00:00 2001 From: dongjunduo Date: Tue, 28 Jun 2022 04:39:26 -0700 Subject: [PATCH 03/11] format --- dbms/src/Functions/FunctionsString.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Functions/FunctionsString.cpp b/dbms/src/Functions/FunctionsString.cpp index 099715e20bd..489f932cdb4 100644 --- a/dbms/src/Functions/FunctionsString.cpp +++ b/dbms/src/Functions/FunctionsString.cpp @@ -4945,10 +4945,10 @@ class FunctionBin : public IFunction template static void executeOneUIntOrInt(T xx, char *& out, bool skip_leading_zero = true, bool auto_close = true) { - long long x = static_cast(xx); + auto x = static_cast(xx); bool was_nonzero = false; bool was_first_nonzero_byte = true; - for (int offset = (sizeof(long long) - 1) * 8; offset >= 0; offset -= 8) + for (int offset = (sizeof(int64) - 1) * 8; offset >= 0; offset -= 8) { UInt8 byte = x >> offset; /// Skip leading zeros @@ -4976,7 +4976,7 @@ class FunctionBin : public IFunction template bool tryExecuteUIntOrInt(const IColumn * col, ColumnPtr & col_res) const { - const ColumnVector * col_vec = checkAndGetColumn>(col); + auto ColumnVector * col_vec = checkAndGetColumn>(col); static constexpr size_t MAX_LENGTH = sizeof(Int64) * word_size + 1; /// Including trailing zero byte. if (col_vec) { From 598d30b0c155b0ea7783754b7ad816117a1cd80a Mon Sep 17 00:00:00 2001 From: dongjunduo Date: Tue, 28 Jun 2022 05:49:12 -0700 Subject: [PATCH 04/11] format --- dbms/src/Functions/FunctionsString.cpp | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/dbms/src/Functions/FunctionsString.cpp b/dbms/src/Functions/FunctionsString.cpp index 489f932cdb4..3d335da059c 100644 --- a/dbms/src/Functions/FunctionsString.cpp +++ b/dbms/src/Functions/FunctionsString.cpp @@ -4945,10 +4945,10 @@ class FunctionBin : public IFunction template static void executeOneUIntOrInt(T xx, char *& out, bool skip_leading_zero = true, bool auto_close = true) { - auto x = static_cast(xx); + auto x = static_cast(xx); bool was_nonzero = false; bool was_first_nonzero_byte = true; - for (int offset = (sizeof(int64) - 1) * 8; offset >= 0; offset -= 8) + for (int offset = (sizeof(Int64) - 1) * 8; offset >= 0; offset -= 8) { UInt8 byte = x >> offset; /// Skip leading zeros @@ -4976,7 +4976,7 @@ class FunctionBin : public IFunction template bool tryExecuteUIntOrInt(const IColumn * col, ColumnPtr & col_res) const { - auto ColumnVector * col_vec = checkAndGetColumn>(col); + auto* col_vec = checkAndGetColumn>(col); static constexpr size_t MAX_LENGTH = sizeof(Int64) * word_size + 1; /// Including trailing zero byte. if (col_vec) { From 969da9ece2bbf602ca38d67a3ecbb79133536e75 Mon Sep 17 00:00:00 2001 From: dongjunduo Date: Tue, 28 Jun 2022 18:24:11 -0700 Subject: [PATCH 05/11] format --- dbms/src/Functions/FunctionsString.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/FunctionsString.cpp b/dbms/src/Functions/FunctionsString.cpp index 3d335da059c..8c03d2eec08 100644 --- a/dbms/src/Functions/FunctionsString.cpp +++ b/dbms/src/Functions/FunctionsString.cpp @@ -4976,7 +4976,7 @@ class FunctionBin : public IFunction template bool tryExecuteUIntOrInt(const IColumn * col, ColumnPtr & col_res) const { - auto* col_vec = checkAndGetColumn>(col); + auto * col_vec = checkAndGetColumn>(col); static constexpr size_t MAX_LENGTH = sizeof(Int64) * word_size + 1; /// Including trailing zero byte. if (col_vec) { From ceccf3d763ee9056812c74c88925201be35e58fe Mon Sep 17 00:00:00 2001 From: dongjunduo Date: Tue, 28 Jun 2022 21:00:42 -0700 Subject: [PATCH 06/11] fix bugprone-signed-char-misuse check --- dbms/src/Functions/FunctionsString.cpp | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/dbms/src/Functions/FunctionsString.cpp b/dbms/src/Functions/FunctionsString.cpp index 8c03d2eec08..b6bd5bfa0e1 100644 --- a/dbms/src/Functions/FunctionsString.cpp +++ b/dbms/src/Functions/FunctionsString.cpp @@ -4945,7 +4945,18 @@ class FunctionBin : public IFunction template static void executeOneUIntOrInt(T xx, char *& out, bool skip_leading_zero = true, bool auto_close = true) { - auto x = static_cast(xx); + Int64 x = 0; + if (typeid(T) == typeid(signed char)) + { + auto t_x = static_cast(xx); + x = static_cast(t_x); + if (xx < char(0)) + x |= (UINT64_MAX & (1 << 8)); + } + else + { + x = static_cast(xx); + } bool was_nonzero = false; bool was_first_nonzero_byte = true; for (int offset = (sizeof(Int64) - 1) * 8; offset >= 0; offset -= 8) From 6b82f967fe1513b7f7cf682bb6a86ac98beec97b Mon Sep 17 00:00:00 2001 From: dongjunduo Date: Tue, 28 Jun 2022 23:59:26 -0700 Subject: [PATCH 07/11] fix bugprone-signed-char-misuse --- dbms/src/Functions/FunctionsString.cpp | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/dbms/src/Functions/FunctionsString.cpp b/dbms/src/Functions/FunctionsString.cpp index b6bd5bfa0e1..f09b9219813 100644 --- a/dbms/src/Functions/FunctionsString.cpp +++ b/dbms/src/Functions/FunctionsString.cpp @@ -4946,17 +4946,18 @@ class FunctionBin : public IFunction static void executeOneUIntOrInt(T xx, char *& out, bool skip_leading_zero = true, bool auto_close = true) { Int64 x = 0; - if (typeid(T) == typeid(signed char)) + if (typeid(T) == typeid(Int8)) { - auto t_x = static_cast(xx); + auto t_x = static_cast(xx); x = static_cast(t_x); - if (xx < char(0)) - x |= (UINT64_MAX & (1 << 8)); + if (xx < Int8(0)) + x |= (UINT64_MAX << 8); } else { x = static_cast(xx); } + //auto x = static_cast(xx); bool was_nonzero = false; bool was_first_nonzero_byte = true; for (int offset = (sizeof(Int64) - 1) * 8; offset >= 0; offset -= 8) From 1211b20307b41a1ab70442fc2fdfd17c9fb7cd61 Mon Sep 17 00:00:00 2001 From: dongjunduo Date: Wed, 29 Jun 2022 00:48:29 -0700 Subject: [PATCH 08/11] switch off bugprone-signed-char-misuse --- dbms/src/Functions/FunctionsString.cpp | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/dbms/src/Functions/FunctionsString.cpp b/dbms/src/Functions/FunctionsString.cpp index f09b9219813..bde8265a3b7 100644 --- a/dbms/src/Functions/FunctionsString.cpp +++ b/dbms/src/Functions/FunctionsString.cpp @@ -4945,19 +4945,7 @@ class FunctionBin : public IFunction template static void executeOneUIntOrInt(T xx, char *& out, bool skip_leading_zero = true, bool auto_close = true) { - Int64 x = 0; - if (typeid(T) == typeid(Int8)) - { - auto t_x = static_cast(xx); - x = static_cast(t_x); - if (xx < Int8(0)) - x |= (UINT64_MAX << 8); - } - else - { - x = static_cast(xx); - } - //auto x = static_cast(xx); + auto x = static_cast(xx); // NOLINT bool was_nonzero = false; bool was_first_nonzero_byte = true; for (int offset = (sizeof(Int64) - 1) * 8; offset >= 0; offset -= 8) From b3dcadaebef4dd62e93d764fbd6b1c3ce3232f3a Mon Sep 17 00:00:00 2001 From: dongjunduo Date: Wed, 29 Jun 2022 00:59:35 -0700 Subject: [PATCH 09/11] format --- dbms/src/Functions/FunctionsString.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/dbms/src/Functions/FunctionsString.cpp b/dbms/src/Functions/FunctionsString.cpp index bde8265a3b7..6251f59f73e 100644 --- a/dbms/src/Functions/FunctionsString.cpp +++ b/dbms/src/Functions/FunctionsString.cpp @@ -4945,7 +4945,7 @@ class FunctionBin : public IFunction template static void executeOneUIntOrInt(T xx, char *& out, bool skip_leading_zero = true, bool auto_close = true) { - auto x = static_cast(xx); // NOLINT + auto x = static_cast(xx); // NOLINT bool was_nonzero = false; bool was_first_nonzero_byte = true; for (int offset = (sizeof(Int64) - 1) * 8; offset >= 0; offset -= 8) From d3421d191a66abe9d5a51a02718f2dc0e7fbfc02 Mon Sep 17 00:00:00 2001 From: dongjunduo Date: Sat, 9 Jul 2022 01:00:46 -0700 Subject: [PATCH 10/11] style: remove useless params and format codes --- dbms/src/Functions/FunctionsString.cpp | 22 +++++++++++++--------- dbms/src/Functions/tests/gtest_bin.cpp | 16 ++++++++-------- tests/fullstack-test/expr/bin.test | 4 ++++ 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/dbms/src/Functions/FunctionsString.cpp b/dbms/src/Functions/FunctionsString.cpp index 691baba58cf..8c2d25b7860 100644 --- a/dbms/src/Functions/FunctionsString.cpp +++ b/dbms/src/Functions/FunctionsString.cpp @@ -4943,16 +4943,16 @@ class FunctionBin : public IFunction } template - static void executeOneUIntOrInt(T xx, char *& out, bool skip_leading_zero = true, bool auto_close = true) + static void executeOneUIntOrInt(T data, char *& out) { - auto x = static_cast(xx); // NOLINT + auto x = static_cast(data); // NOLINT bool was_nonzero = false; bool was_first_nonzero_byte = true; for (int offset = (sizeof(Int64) - 1) * 8; offset >= 0; offset -= 8) { UInt8 byte = x >> offset; /// Skip leading zeros - if (byte == 0 && !was_nonzero && offset && skip_leading_zero) //-V560 + if (byte == 0 && !was_nonzero && offset) continue; was_nonzero = true; if (was_first_nonzero_byte) @@ -4966,11 +4966,8 @@ class FunctionBin : public IFunction out += word_size; } } - if (auto_close) - { - *out = '\0'; - ++out; - } + *out = '\0'; + ++out; } template @@ -5013,7 +5010,14 @@ class FunctionBin : public IFunction { const IColumn * column = block.getByPosition(arguments[0]).column.get(); ColumnPtr res_column; - if (tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column)) + if (tryExecuteUIntOrInt(column, res_column) || + tryExecuteUIntOrInt(column, res_column) || + tryExecuteUIntOrInt(column, res_column) || + tryExecuteUIntOrInt(column, res_column) || + tryExecuteUIntOrInt(column, res_column) || + tryExecuteUIntOrInt(column, res_column) || + tryExecuteUIntOrInt(column, res_column) || + tryExecuteUIntOrInt(column, res_column)) { block.getByPosition(result).column = std::move(res_column); return; diff --git a/dbms/src/Functions/tests/gtest_bin.cpp b/dbms/src/Functions/tests/gtest_bin.cpp index 72381e2b375..5d79673a6c1 100644 --- a/dbms/src/Functions/tests/gtest_bin.cpp +++ b/dbms/src/Functions/tests/gtest_bin.cpp @@ -33,18 +33,18 @@ TEST_F(TestBin, Boundary) try { ASSERT_COLUMN_EQ( - createColumn({"1111111111111111111111111111111111111111111111111111111110000000", "1111111"}), - executeFunction("bin", createColumn({INT8_MIN, INT8_MAX}))); + createColumn({"0", "1111111111111111111111111111111111111111111111111111111110000000", "1111111"}), + executeFunction("bin", createColumn({0, INT8_MIN, INT8_MAX}))); ASSERT_COLUMN_EQ( - createColumn({"1111111111111111111111111111111111111111111111111000000000000000", "111111111111111"}), - executeFunction("bin", createColumn({INT16_MIN, INT16_MAX}))); + createColumn({"0", "1111111111111111111111111111111111111111111111111000000000000000", "111111111111111"}), + executeFunction("bin", createColumn({0, INT16_MIN, INT16_MAX}))); ASSERT_COLUMN_EQ( - createColumn({"1111111111111111111111111111111110000000000000000000000000000000", "1111111111111111111111111111111"}), - executeFunction("bin", createColumn({INT32_MIN, INT32_MAX}))); + createColumn({"0", "1111111111111111111111111111111110000000000000000000000000000000", "1111111111111111111111111111111"}), + executeFunction("bin", createColumn({0, INT32_MIN, INT32_MAX}))); ASSERT_COLUMN_EQ( - createColumn({"1000000000000000000000000000000000000000000000000000000000000000", + createColumn({"0", "1000000000000000000000000000000000000000000000000000000000000000", "111111111111111111111111111111111111111111111111111111111111111"}), - executeFunction("bin", createColumn({INT64_MIN, INT64_MAX}))); + executeFunction("bin", createColumn({0, INT64_MIN, INT64_MAX}))); ASSERT_COLUMN_EQ( createColumn({"0", "11111111"}), executeFunction("bin", createColumn({0, 255}))); diff --git a/tests/fullstack-test/expr/bin.test b/tests/fullstack-test/expr/bin.test index bdb1a861605..f789b2c1210 100644 --- a/tests/fullstack-test/expr/bin.test +++ b/tests/fullstack-test/expr/bin.test @@ -20,6 +20,7 @@ mysql> insert into test.t1 values(44); mysql> insert into test.t1 values(100); mysql> insert into test.t1 values(-9223372036854775808); mysql> insert into test.t1 values(9223372036854775807); +mysql> insert into test.t1 values(NULL); mysql> alter table test.t1 set tiflash replica 1; mysql> drop table if exists test.t2; mysql> create table test.t2(c1 bigint unsigned); @@ -28,6 +29,7 @@ mysql> insert into test.t2 values(1); mysql> insert into test.t2 values(44); mysql> insert into test.t2 values(100); mysql> insert into test.t2 values(18446744073709551615); +mysql> insert into test.t2 values(NULL); mysql> alter table test.t2 set tiflash replica 1; func> wait_table test t1 @@ -43,6 +45,7 @@ mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; | 1100100 | | 1000000000000000000000000000000000000000000000000000000000000000 | | 111111111111111111111111111111111111111111111111111111111111111 | +| NULL | +------------------------------------------------------------------+ mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select bin(c1) from test.t2; +------------------------------------------------------------------+ @@ -53,5 +56,6 @@ mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; | 101100 | | 1100100 | | 1111111111111111111111111111111111111111111111111111111111111111 | +| NULL | +------------------------------------------------------------------+ From d8e3734b03f8c2a7f541fedabfb5922a8432e280 Mon Sep 17 00:00:00 2001 From: dongjunduo Date: Sat, 9 Jul 2022 01:44:59 -0700 Subject: [PATCH 11/11] style: format codes --- dbms/src/Functions/FunctionsString.cpp | 9 +-------- dbms/src/Functions/tests/gtest_bin.cpp | 3 +-- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/dbms/src/Functions/FunctionsString.cpp b/dbms/src/Functions/FunctionsString.cpp index 8c2d25b7860..0dcb618b11d 100644 --- a/dbms/src/Functions/FunctionsString.cpp +++ b/dbms/src/Functions/FunctionsString.cpp @@ -5010,14 +5010,7 @@ class FunctionBin : public IFunction { const IColumn * column = block.getByPosition(arguments[0]).column.get(); ColumnPtr res_column; - if (tryExecuteUIntOrInt(column, res_column) || - tryExecuteUIntOrInt(column, res_column) || - tryExecuteUIntOrInt(column, res_column) || - tryExecuteUIntOrInt(column, res_column) || - tryExecuteUIntOrInt(column, res_column) || - tryExecuteUIntOrInt(column, res_column) || - tryExecuteUIntOrInt(column, res_column) || - tryExecuteUIntOrInt(column, res_column)) + if (tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column) || tryExecuteUIntOrInt(column, res_column)) { block.getByPosition(result).column = std::move(res_column); return; diff --git a/dbms/src/Functions/tests/gtest_bin.cpp b/dbms/src/Functions/tests/gtest_bin.cpp index 5d79673a6c1..76b6b6ed473 100644 --- a/dbms/src/Functions/tests/gtest_bin.cpp +++ b/dbms/src/Functions/tests/gtest_bin.cpp @@ -42,8 +42,7 @@ try createColumn({"0", "1111111111111111111111111111111110000000000000000000000000000000", "1111111111111111111111111111111"}), executeFunction("bin", createColumn({0, INT32_MIN, INT32_MAX}))); ASSERT_COLUMN_EQ( - createColumn({"0", "1000000000000000000000000000000000000000000000000000000000000000", - "111111111111111111111111111111111111111111111111111111111111111"}), + createColumn({"0", "1000000000000000000000000000000000000000000000000000000000000000", "111111111111111111111111111111111111111111111111111111111111111"}), executeFunction("bin", createColumn({0, INT64_MIN, INT64_MAX}))); ASSERT_COLUMN_EQ( createColumn({"0", "11111111"}),