Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: support 'bin' function push down #5246

Merged
merged 17 commits into from
Jul 25, 2022
Merged
Show file tree
Hide file tree
Changes from 11 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 35 additions & 0 deletions dbms/src/Common/hex.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,38 @@ const char * const hex_char_to_digit_table = "\xff\xff\xff\xff\xff\xff\xff\xff\x
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff"
"\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff";

const char * const bin_byte_to_char_table = "0000000000000001000000100000001100000100000001010000011000000111"
"0000100000001001000010100000101100001100000011010000111000001111"
"0001000000010001000100100001001100010100000101010001011000010111"
"0001100000011001000110100001101100011100000111010001111000011111"
"0010000000100001001000100010001100100100001001010010011000100111"
"0010100000101001001010100010101100101100001011010010111000101111"
"0011000000110001001100100011001100110100001101010011011000110111"
"0011100000111001001110100011101100111100001111010011111000111111"
"0100000001000001010000100100001101000100010001010100011001000111"
"0100100001001001010010100100101101001100010011010100111001001111"
"0101000001010001010100100101001101010100010101010101011001010111"
"0101100001011001010110100101101101011100010111010101111001011111"
"0110000001100001011000100110001101100100011001010110011001100111"
"0110100001101001011010100110101101101100011011010110111001101111"
"0111000001110001011100100111001101110100011101010111011001110111"
"0111100001111001011110100111101101111100011111010111111001111111"
"1000000010000001100000101000001110000100100001011000011010000111"
"1000100010001001100010101000101110001100100011011000111010001111"
"1001000010010001100100101001001110010100100101011001011010010111"
"1001100010011001100110101001101110011100100111011001111010011111"
"1010000010100001101000101010001110100100101001011010011010100111"
"1010100010101001101010101010101110101100101011011010111010101111"
"1011000010110001101100101011001110110100101101011011011010110111"
"1011100010111001101110101011101110111100101111011011111010111111"
"1100000011000001110000101100001111000100110001011100011011000111"
"1100100011001001110010101100101111001100110011011100111011001111"
"1101000011010001110100101101001111010100110101011101011011010111"
"1101100011011001110110101101101111011100110111011101111011011111"
"1110000011100001111000101110001111100100111001011110011011100111"
"1110100011101001111010101110101111101100111011011110111011101111"
"1111000011110001111100101111001111110100111101011111011011110111"
"1111100011111001111110101111101111111100111111011111111011111111";

const size_t bin_byte_no_zero_prefix_len[256] = {1, 1, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 4, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8};
15 changes: 15 additions & 0 deletions dbms/src/Common/hex.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,21 @@ inline void writeHexByteLowercase(UInt8 byte, void * out)
memcpy(out, &hex_byte_to_char_lowercase_table[static_cast<size_t>(byte) * 2], 2);
}

extern const char * const bin_byte_to_char_table;
extern const size_t bin_byte_no_zero_prefix_len[];

inline void writeBinByte(UInt8 byte, void * out)
{
memcpy(out, &bin_byte_to_char_table[static_cast<size_t>(byte) * 8], 8);
}

inline size_t writeNoZeroPrefixBinByte(UInt8 byte, void * out)
{
size_t len = bin_byte_no_zero_prefix_len[static_cast<size_t>(byte)];
memcpy(out, &bin_byte_to_char_table[byte * 8 + (8 - len)], len);
return len;
}

/// Produces hex representation of an unsigned int with leading zeros (for checksums)
template <typename TUInt>
inline void writeHexUIntImpl(TUInt uint_, char * out, const char * const table)
Expand Down
2 changes: 1 addition & 1 deletion dbms/src/Flash/Coprocessor/DAGUtils.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -603,7 +603,7 @@ const std::unordered_map<tipb::ScalarFuncSig, String> scalar_func_map({
{tipb::ScalarFuncSig::TimestampDiff, "tidbTimestampDiff"},

//{tipb::ScalarFuncSig::BitLength, "cast"},
//{tipb::ScalarFuncSig::Bin, "cast"},
{tipb::ScalarFuncSig::Bin, "bin"},
{tipb::ScalarFuncSig::ASCII, "ascii"},
//{tipb::ScalarFuncSig::Char, "cast"},
{tipb::ScalarFuncSig::CharLengthUTF8, "lengthUTF8"},
Expand Down
118 changes: 118 additions & 0 deletions dbms/src/Functions/FunctionsString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include <Common/TargetSpecific.h>
#include <Common/UTF8Helpers.h>
#include <Common/Volnitsky.h>
#include <Common/hex.h>
#include <Core/AccurateComparison.h>
#include <DataTypes/DataTypeArray.h>
#include <Flash/Coprocessor/DAGContext.h>
Expand Down Expand Up @@ -4910,6 +4911,122 @@ class FunctionFormatWithLocale : public IFunction
}
};

class FunctionBin : public IFunction
{
public:
static constexpr auto name = "bin";
static constexpr size_t word_size = 8;
FunctionBin() = default;

static FunctionPtr create(const Context & /*context*/)
{
return std::make_shared<FunctionBin>();
}

std::string getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }

DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (arguments.size() != 1)
throw Exception(
fmt::format("Number of arguments for function {} doesn't match: passed {}, should be 1.", getName(), arguments.size()),
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);

auto first_argument = removeNullable(arguments[0]);
if (!first_argument->isInteger())
throw Exception(
fmt::format("Illegal type {} of first argument of function {}", first_argument->getName(), getName()),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

return std::make_shared<DataTypeString>();
}

template <typename T>
static void executeOneUIntOrInt(T xx, char *& out, bool skip_leading_zero = true, bool auto_close = true)
An-DJ marked this conversation as resolved.
Show resolved Hide resolved
An-DJ marked this conversation as resolved.
Show resolved Hide resolved
{
auto x = static_cast<Int64>(xx); // NOLINT
bool was_nonzero = false;
bool was_first_nonzero_byte = true;
for (int offset = (sizeof(Int64) - 1) * 8; offset >= 0; offset -= 8)
{
UInt8 byte = x >> offset;
/// Skip leading zeros
if (byte == 0 && !was_nonzero && offset && skip_leading_zero) //-V560
continue;
was_nonzero = true;
if (was_first_nonzero_byte)
{
out += writeNoZeroPrefixBinByte(byte, out);
was_first_nonzero_byte = false;
}
else
{
writeBinByte(byte, out);
out += word_size;
}
}
if (auto_close)
{
*out = '\0';
++out;
}
}

template <typename T>
bool tryExecuteUIntOrInt(const IColumn * col, ColumnPtr & col_res) const
{
auto * col_vec = checkAndGetColumn<ColumnVector<T>>(col);
static constexpr size_t MAX_LENGTH = sizeof(Int64) * word_size + 1; /// Including trailing zero byte.
if (col_vec)
{
auto col_str = ColumnString::create();
ColumnString::Chars_t & out_vec = col_str->getChars();
ColumnString::Offsets & out_offsets = col_str->getOffsets();
const typename ColumnVector<T>::Container & in_vec = col_vec->getData();
size_t size = in_vec.size();
out_offsets.resize(size);
out_vec.resize(size * (word_size + 1) + MAX_LENGTH); /// word_size+1 is length of one byte in hex/bin plus zero byte.
size_t pos = 0;
for (size_t i = 0; i < size; ++i)
{
/// Manual exponential growth, so as not to rely on the linear amortized work time of `resize` (no one guarantees it).
if (pos + MAX_LENGTH > out_vec.size())
out_vec.resize(out_vec.size() * word_size + MAX_LENGTH);
char * begin = reinterpret_cast<char *>(&out_vec[pos]);
char * end = begin;
executeOneUIntOrInt(in_vec[i], end);
pos += end - begin;
out_offsets[i] = pos;
}
out_vec.resize(pos);
col_res = std::move(col_str);
return true;
}
else
{
return false;
}
}

void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override
{
const IColumn * column = block.getByPosition(arguments[0]).column.get();
ColumnPtr res_column;
if (tryExecuteUIntOrInt<UInt8>(column, res_column) || tryExecuteUIntOrInt<UInt16>(column, res_column) || tryExecuteUIntOrInt<UInt32>(column, res_column) || tryExecuteUIntOrInt<UInt64>(column, res_column) || tryExecuteUIntOrInt<Int8>(column, res_column) || tryExecuteUIntOrInt<Int16>(column, res_column) || tryExecuteUIntOrInt<Int32>(column, res_column) || tryExecuteUIntOrInt<Int64>(column, res_column))
An-DJ marked this conversation as resolved.
Show resolved Hide resolved
{
block.getByPosition(result).column = std::move(res_column);
return;
}
else
{
throw Exception(fmt::format("Illegal argument of function {}", getName()), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
}
}

private:
};

// clang-format off
struct NameEmpty { static constexpr auto name = "empty"; };
struct NameNotEmpty { static constexpr auto name = "notEmpty"; };
Expand Down Expand Up @@ -4994,5 +5111,6 @@ void registerFunctionsString(FunctionFactory & factory)
factory.registerFunction<FunctionSubStringIndex>();
factory.registerFunction<FunctionFormat>();
factory.registerFunction<FunctionFormatWithLocale>();
factory.registerFunction<FunctionBin>();
}
} // namespace DB
63 changes: 63 additions & 0 deletions dbms/src/Functions/tests/gtest_bin.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
// Copyright 2022 PingCAP, Ltd.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <TestUtils/FunctionTestUtils.h>
#include <TestUtils/TiFlashTestBasic.h>
namespace DB::tests
{
class TestBin : public DB::tests::FunctionTest
{
};

TEST_F(TestBin, Simple)
try
{
ASSERT_COLUMN_EQ(
createColumn<String>({"1100100"}),
executeFunction("bin", createColumn<Int64>({100})));
}
CATCH

TEST_F(TestBin, Boundary)
try
{
ASSERT_COLUMN_EQ(
createColumn<String>({"1111111111111111111111111111111111111111111111111111111110000000", "1111111"}),
executeFunction("bin", createColumn<Int8>({INT8_MIN, INT8_MAX})));
An-DJ marked this conversation as resolved.
Show resolved Hide resolved
ASSERT_COLUMN_EQ(
createColumn<String>({"1111111111111111111111111111111111111111111111111000000000000000", "111111111111111"}),
executeFunction("bin", createColumn<Int16>({INT16_MIN, INT16_MAX})));
ASSERT_COLUMN_EQ(
createColumn<String>({"1111111111111111111111111111111110000000000000000000000000000000", "1111111111111111111111111111111"}),
executeFunction("bin", createColumn<Int32>({INT32_MIN, INT32_MAX})));
ASSERT_COLUMN_EQ(
createColumn<String>({"1000000000000000000000000000000000000000000000000000000000000000",
"111111111111111111111111111111111111111111111111111111111111111"}),
executeFunction("bin", createColumn<Int64>({INT64_MIN, INT64_MAX})));
ASSERT_COLUMN_EQ(
createColumn<String>({"0", "11111111"}),
executeFunction("bin", createColumn<UInt8>({0, 255})));
ASSERT_COLUMN_EQ(
createColumn<String>({"0", "1111111111111111"}),
executeFunction("bin", createColumn<UInt16>({0, 65535})));
ASSERT_COLUMN_EQ(
createColumn<String>({"0", "11111111111111111111111111111111"}),
executeFunction("bin", createColumn<UInt32>({0, 4294967295})));
ASSERT_COLUMN_EQ(
createColumn<String>({"0", "1111111111111111111111111111111111111111111111111111111111111111"}),
executeFunction("bin", createColumn<UInt64>({0, ULLONG_MAX})));
}
CATCH

} // namespace DB::tests
57 changes: 57 additions & 0 deletions tests/fullstack-test/expr/bin.test
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# Copyright 2022 PingCAP, Ltd.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

mysql> drop table if exists test.t1;
mysql> create table test.t1(c1 bigint);
mysql> insert into test.t1 values(0);
mysql> insert into test.t1 values(1);
mysql> insert into test.t1 values(44);
mysql> insert into test.t1 values(100);
mysql> insert into test.t1 values(-9223372036854775808);
mysql> insert into test.t1 values(9223372036854775807);
An-DJ marked this conversation as resolved.
Show resolved Hide resolved
mysql> alter table test.t1 set tiflash replica 1;
mysql> drop table if exists test.t2;
mysql> create table test.t2(c1 bigint unsigned);
mysql> insert into test.t2 values(0);
mysql> insert into test.t2 values(1);
mysql> insert into test.t2 values(44);
mysql> insert into test.t2 values(100);
mysql> insert into test.t2 values(18446744073709551615);
mysql> alter table test.t2 set tiflash replica 1;

func> wait_table test t1
func> wait_table test t2

mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select bin(c1) from test.t1;
+------------------------------------------------------------------+
| bin(c1) |
+------------------------------------------------------------------+
| 0 |
| 1 |
| 101100 |
| 1100100 |
| 1000000000000000000000000000000000000000000000000000000000000000 |
| 111111111111111111111111111111111111111111111111111111111111111 |
+------------------------------------------------------------------+
mysql> set @@tidb_isolation_read_engines='tiflash'; set @@tidb_enforce_mpp = 1; select bin(c1) from test.t2;
+------------------------------------------------------------------+
| bin(c1) |
+------------------------------------------------------------------+
| 0 |
| 1 |
| 101100 |
| 1100100 |
| 1111111111111111111111111111111111111111111111111111111111111111 |
+------------------------------------------------------------------+