Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve the performance of length and ascii functions (#9345) #9400

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dbms/src/Functions/FunctionsNull.h
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ class FunctionCoalesce : public IFunction
public:
static constexpr auto name = "coalesce";
static FunctionPtr create(const Context & context);
FunctionCoalesce(const Context & context)
explicit FunctionCoalesce(const Context & context)
: context(context)
{}

Expand Down
60 changes: 31 additions & 29 deletions dbms/src/Functions/FunctionsString.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
// limitations under the License.

#include <Columns/ColumnArray.h>
#include <Columns/ColumnsNumber.h>
#include <Columns/IColumn.h>
#include <Common/TargetSpecific.h>
#include <Common/UTF8Helpers.h>
#include <Common/Volnitsky.h>
Expand Down Expand Up @@ -4127,9 +4129,11 @@ class FunctionASCII : public IFunction
std::string getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }

bool useDefaultImplementationForConstants() const override { return true; }

DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (arguments.size() != 1)
if unlikely (arguments.size() != 1)
throw Exception(
fmt::format("Number of arguments for function {} doesn't match: passed {}, should be 1.", getName(), arguments.size()),
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
Expand All @@ -4140,28 +4144,25 @@ class FunctionASCII : public IFunction
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override
{
const IColumn * c0_col = block.getByPosition(arguments[0]).column.get();
const auto * c0_const = checkAndGetColumn<ColumnConst>(c0_col);
const auto * c0_string = checkAndGetColumn<ColumnString>(c0_col);
if unlikely (c0_string == nullptr)
throw Exception(
fmt::format("Illegal argument of function {}", getName()),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

Field res_field;
int val_num = c0_col->size();
auto val_num = static_cast<ssize_t>(c0_col->size());
auto col_res = ColumnInt64::create();
col_res->reserve(val_num);
if (c0_const == nullptr && c0_string == nullptr)
throw Exception(fmt::format("Illegal argument of function {}", getName()), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
ColumnInt64::Container & data = col_res->getData();
data.resize(val_num);

for (int i = 0; i < val_num; i++)
{
c0_col->get(i, res_field);
String handled_str = res_field.get<String>();
Int64 res = handled_str.empty() ? 0 : static_cast<Int64>(handled_str[0]);
col_res->insert(res);
}
const auto & chars = c0_string->getChars();
const auto & offsets = c0_string->getOffsets();

for (ssize_t i = 0; i < val_num; i++)
data[i] = chars[offsets[i - 1]];

block.getByPosition(result).column = std::move(col_res);
}

private:
};

class FunctionLength : public IFunction
Expand All @@ -4178,9 +4179,11 @@ class FunctionLength : public IFunction
std::string getName() const override { return name; }
size_t getNumberOfArguments() const override { return 1; }

bool useDefaultImplementationForConstants() const override { return true; }

DataTypePtr getReturnTypeImpl(const DataTypes & arguments) const override
{
if (arguments.size() != 1)
if unlikely (arguments.size() != 1)
throw Exception(
fmt::format("Number of arguments for function {} doesn't match: passed {}, should be 1.", getName(), arguments.size()),
ErrorCodes::NUMBER_OF_ARGUMENTS_DOESNT_MATCH);
Expand All @@ -4191,22 +4194,21 @@ class FunctionLength : public IFunction
void executeImpl(Block & block, const ColumnNumbers & arguments, size_t result) const override
{
const IColumn * c0_col = block.getByPosition(arguments[0]).column.get();
const auto * c0_const = checkAndGetColumn<ColumnConst>(c0_col);
const auto * c0_string = checkAndGetColumn<ColumnString>(c0_col);
if unlikely (c0_string == nullptr)
throw Exception(
fmt::format("Illegal argument of function {}", getName()),
ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);

Field res_field;
int val_num = c0_col->size();
auto val_num = static_cast<ssize_t>(c0_col->size());
auto col_res = ColumnInt64::create();
col_res->reserve(val_num);
if (c0_const == nullptr && c0_string == nullptr)
throw Exception(fmt::format("Illegal argument of function {}", getName()), ErrorCodes::ILLEGAL_TYPE_OF_ARGUMENT);
ColumnInt64::Container & data = col_res->getData();
data.resize(val_num);

for (int i = 0; i < val_num; i++)
{
c0_col->get(i, res_field);
String handled_str = res_field.get<String>();
col_res->insert(static_cast<Int64>(handled_str.size()));
}
const auto & offsets = c0_string->getOffsets();

for (ssize_t i = 0; i < val_num; i++)
data[i] = offsets[i] - offsets[i - 1] - 1;

block.getByPosition(result).column = std::move(col_res);
}
Expand Down
33 changes: 17 additions & 16 deletions dbms/src/Functions/GatherUtils/Algorithms.h
Original file line number Diff line number Diff line change
Expand Up @@ -187,7 +187,7 @@ void concat(const std::vector<std::unique_ptr<IArraySource>> & array_sources, Si
size_t sources_num = array_sources.size();
std::vector<char> is_const(sources_num);

auto checkAndGetSizeToReserve = [](auto source, IArraySource * array_source) {
auto check_and_get_size_to_reserve = [](auto source, IArraySource * array_source) {
if (source == nullptr)
throw Exception("Concat function expected " + demangle(typeid(Source).name()) + " or "
+ demangle(typeid(ConstSource<Source>).name()) + " but got "
Expand All @@ -199,17 +199,18 @@ void concat(const std::vector<std::unique_ptr<IArraySource>> & array_sources, Si
size_t size_to_reserve = 0;
for (auto i : ext::range(0, sources_num))
{
auto & source = array_sources[i];
const auto & source = array_sources[i];
is_const[i] = source->isConst();
if (is_const[i])
size_to_reserve += checkAndGetSizeToReserve(typeid_cast<ConstSource<Source> *>(source.get()), source.get());
size_to_reserve
+= check_and_get_size_to_reserve(typeid_cast<ConstSource<Source> *>(source.get()), source.get());
else
size_to_reserve += checkAndGetSizeToReserve(typeid_cast<Source *>(source.get()), source.get());
size_to_reserve += check_and_get_size_to_reserve(typeid_cast<Source *>(source.get()), source.get());
}

sink.reserve(size_to_reserve);

auto writeNext = [&sink](auto source) {
auto write_next = [&sink](auto source) {
writeSlice(source->getWhole(), sink);
source->next();
};
Expand All @@ -218,11 +219,11 @@ void concat(const std::vector<std::unique_ptr<IArraySource>> & array_sources, Si
{
for (auto i : ext::range(0, sources_num))
{
auto & source = array_sources[i];
const auto & source = array_sources[i];
if (is_const[i])
writeNext(static_cast<ConstSource<Source> *>(source.get()));
write_next(static_cast<ConstSource<Source> *>(source.get()));
else
writeNext(static_cast<Source *>(source.get()));
write_next(static_cast<Source *>(source.get()));
}
sink.next();
}
Expand Down Expand Up @@ -383,11 +384,11 @@ void NO_INLINE pad(SourceA && src, SourceB && padding, Sink && sink, ssize_t len
size_t left = static_cast<size_t>(length) - slice.size;
if (is_left)
{
StringSource::Slice padSlice = padding.getWhole();
while (left > padSlice.size && padSlice.size != 0)
StringSource::Slice pad_slice = padding.getWhole();
while (left > pad_slice.size && pad_slice.size != 0)
{
writeSlice(padSlice, sink);
left -= padSlice.size;
writeSlice(pad_slice, sink);
left -= pad_slice.size;
}

writeSlice(padding.getSliceFromLeft(0, left), sink);
Expand All @@ -396,11 +397,11 @@ void NO_INLINE pad(SourceA && src, SourceB && padding, Sink && sink, ssize_t len
else
{
writeSlice(slice, sink);
StringSource::Slice padSlice = padding.getWhole();
while (left > padSlice.size && padSlice.size != 0)
StringSource::Slice pad_slice = padding.getWhole();
while (left > pad_slice.size && pad_slice.size != 0)
{
writeSlice(padSlice, sink);
left -= padSlice.size;
writeSlice(pad_slice, sink);
left -= pad_slice.size;
}

writeSlice(padding.getSliceFromLeft(0, left), sink);
Expand Down
65 changes: 65 additions & 0 deletions dbms/src/Functions/tests/bench_collation.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,12 @@
#include <TestUtils/FunctionTestUtils.h>
#include <benchmark/benchmark.h>

<<<<<<<< HEAD:dbms/src/Functions/tests/bench_collation.cpp
/// this is a hack, include the cpp file so we can test MatchImpl directly
========
/// this is a hack, include the cpp file so we can test functions directly
#include <Functions/FunctionsString.cpp> // NOLINT
>>>>>>>> b30c1f5090 (Improve the performance of `length` and `ascii` functions (#9345)):dbms/src/Functions/tests/bench_function_string.cpp
#include <Functions/FunctionsStringSearch.cpp> // NOLINT

namespace DB
Expand Down Expand Up @@ -146,5 +151,65 @@ BENCH_LIKE_COLLATOR(ASCII_BIN);
BENCH_LIKE_COLLATOR(BINARY);
BENCH_LIKE_COLLATOR(LATIN1_BIN);

class LengthBench : public benchmark::Fixture
{
public:
using ColStringType = typename TypeTraits<String>::FieldType;

ColumnsWithTypeAndName data1{toVec<String>("col", std::vector<ColStringType>(data_num, ""))};
ColumnsWithTypeAndName data2{toVec<String>("col", std::vector<ColStringType>(data_num, "aaaaaaaaaa"))};
ColumnsWithTypeAndName data3{toVec<String>("col", std::vector<ColStringType>(data_num, "啊aaaaaaaa"))};

void SetUp(const benchmark::State &) override {}
};

BENCHMARK_DEFINE_F(LengthBench, bench)
(benchmark::State & state)
try
{
FunctionLength function_length;
std::vector<Block> blocks{Block(data1), Block(data2), Block(data3)};
for (auto & block : blocks)
block.insert({nullptr, std::make_shared<DataTypeNumber<UInt8>>(), "res"});
ColumnNumbers arguments{0};
for (auto _ : state)
{
for (auto & block : blocks)
function_length.executeImpl(block, arguments, 1);
}
}
CATCH
BENCHMARK_REGISTER_F(LengthBench, bench)->Iterations(10);

class ASCIIBench : public benchmark::Fixture
{
public:
using ColStringType = typename TypeTraits<String>::FieldType;

ColumnsWithTypeAndName data1{toVec<String>("col", std::vector<ColStringType>(data_num, ""))};
ColumnsWithTypeAndName data2{toVec<String>("col", std::vector<ColStringType>(data_num, "aaaaaaaaaa"))};
ColumnsWithTypeAndName data3{toVec<String>("col", std::vector<ColStringType>(data_num, "啊aaaaaaaa"))};

void SetUp(const benchmark::State &) override {}
};

BENCHMARK_DEFINE_F(ASCIIBench, bench)
(benchmark::State & state)
try
{
FunctionASCII function_ascii;
std::vector<Block> blocks{Block(data1), Block(data2), Block(data3)};
for (auto & block : blocks)
block.insert({nullptr, std::make_shared<DataTypeNumber<UInt8>>(), "res"});
ColumnNumbers arguments{0};
for (auto _ : state)
{
for (auto & block : blocks)
function_ascii.executeImpl(block, arguments, 1);
}
}
CATCH
BENCHMARK_REGISTER_F(ASCIIBench, bench)->Iterations(10);

} // namespace tests
} // namespace DB
Loading