Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow users to explicitly access LowCardinality<WrappedColumn> columns as WrappedColumn #79

Merged
merged 4 commits into from
Mar 24, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions clickhouse/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ SET ( clickhouse-cpp-lib-src
columns/ip4.cpp
columns/ip6.cpp
columns/lowcardinality.cpp
columns/lowcardinalityadaptor.h
columns/nullable.cpp
columns/numeric.cpp
columns/string.cpp
Expand Down
5 changes: 4 additions & 1 deletion clickhouse/client.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,9 @@ bool Client::Impl::ReadBlock(Block* block, CodedInputStream* input) {
return false;
}

CreateColumnByTypeSettings create_column_settings;
create_column_settings.low_cardinality_as_wrapped_column = options_.backward_compatibility_lowcardinality_as_wrapped_column;

std::string name;
std::string type;
for (size_t i = 0; i < num_columns; ++i) {
Expand All @@ -465,7 +468,7 @@ bool Client::Impl::ReadBlock(Block* block, CodedInputStream* input) {
return false;
}

if (ColumnRef col = CreateColumnByType(type)) {
if (ColumnRef col = CreateColumnByType(type, create_column_settings)) {
if (num_rows && !col->Load(input, num_rows)) {
throw std::runtime_error("can't load");
}
Expand Down
8 changes: 8 additions & 0 deletions clickhouse/client.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,14 @@ struct ClientOptions {
// TCP options
DECLARE_FIELD(tcp_nodelay, bool, TcpNoDelay, true);

/** It helps to ease migration of the old codebases, which can't afford to switch
* to using ColumnLowCardinalityT or ColumnLowCardinality directly,
* but still want to benefit from smaller on-wire LowCardinality bandwidth footprint.
*
* @see LowCardinalitySerializationAdaptor, CreateColumnByType
*/
DECLARE_FIELD(backward_compatibility_lowcardinality_as_wrapped_column, bool, SetBakcwardCompatibilityFeatureLowCardinalityAsWrappedColumn, true);

#undef DECLARE_FIELD
};

Expand Down
42 changes: 28 additions & 14 deletions clickhouse/columns/factory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
#include "ip4.h"
#include "ip6.h"
#include "lowcardinality.h"
#include "lowcardinalityadaptor.h"
#include "nothing.h"
#include "nullable.h"
#include "numeric.h"
Expand Down Expand Up @@ -94,17 +95,17 @@ static ColumnRef CreateTerminalColumn(const TypeAst& ast) {
}
}

static ColumnRef CreateColumnFromAst(const TypeAst& ast) {
static ColumnRef CreateColumnFromAst(const TypeAst& ast, CreateColumnByTypeSettings settings) {
switch (ast.meta) {
case TypeAst::Array: {
return std::make_shared<ColumnArray>(
CreateColumnFromAst(ast.elements.front())
CreateColumnFromAst(ast.elements.front(), settings)
);
}

case TypeAst::Nullable: {
return std::make_shared<ColumnNullable>(
CreateColumnFromAst(ast.elements.front()),
CreateColumnFromAst(ast.elements.front(), settings),
std::make_shared<ColumnUInt8>()
);
}
Expand All @@ -118,7 +119,7 @@ static ColumnRef CreateColumnFromAst(const TypeAst& ast) {

columns.reserve(ast.elements.size());
for (const auto& elem : ast.elements) {
if (auto col = CreateColumnFromAst(elem)) {
if (auto col = CreateColumnFromAst(elem, settings)) {
columns.push_back(col);
} else {
return nullptr;
Expand Down Expand Up @@ -151,14 +152,27 @@ static ColumnRef CreateColumnFromAst(const TypeAst& ast) {
}
case TypeAst::LowCardinality: {
const auto nested = ast.elements.front();
switch (nested.code) {
// TODO (nemkov): update this to maximize code reuse.
case Type::String:
return std::make_shared<ColumnLowCardinalityT<ColumnString>>();
case Type::FixedString:
return std::make_shared<ColumnLowCardinalityT<ColumnFixedString>>(nested.elements.front().value);
default:
throw std::runtime_error("LowCardinality(" + nested.name + ") is not supported");
if (settings.low_cardinality_as_wrapped_column) {
switch (nested.code) {
// TODO (nemkov): update this to maximize code reuse.
case Type::String:
return std::make_shared<LowCardinalitySerializationAdaptor<ColumnString>>();
case Type::FixedString:
return std::make_shared<LowCardinalitySerializationAdaptor<ColumnFixedString>>(nested.elements.front().value);
default:
throw std::runtime_error("LowCardinality(" + nested.name + ") is not supported");
}
}
else {
switch (nested.code) {
// TODO (nemkov): update this to maximize code reuse.
case Type::String:
return std::make_shared<ColumnLowCardinalityT<ColumnString>>();
case Type::FixedString:
return std::make_shared<ColumnLowCardinalityT<ColumnFixedString>>(nested.elements.front().value);
default:
throw std::runtime_error("LowCardinality(" + nested.name + ") is not supported");
}
}
}
case TypeAst::SimpleAggregateFunction: {
Expand All @@ -178,10 +192,10 @@ static ColumnRef CreateColumnFromAst(const TypeAst& ast) {
} // namespace


ColumnRef CreateColumnByType(const std::string& type_name) {
ColumnRef CreateColumnByType(const std::string& type_name, CreateColumnByTypeSettings settings) {
auto ast = ParseTypeName(type_name);
if (ast != nullptr) {
return CreateColumnFromAst(*ast);
return CreateColumnFromAst(*ast, settings);
}

return nullptr;
Expand Down
7 changes: 6 additions & 1 deletion clickhouse/columns/factory.h
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,11 @@

namespace clickhouse {

ColumnRef CreateColumnByType(const std::string& type_name);
struct CreateColumnByTypeSettings
{
bool low_cardinality_as_wrapped_column = false;
};

ColumnRef CreateColumnByType(const std::string& type_name, CreateColumnByTypeSettings settings = {});

}
7 changes: 6 additions & 1 deletion clickhouse/columns/lowcardinality.h
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,12 @@ class ColumnLowCardinalityT : public ColumnLowCardinality {

template <typename ...Args>
explicit ColumnLowCardinalityT(Args &&... args)
: ColumnLowCardinality(std::make_shared<DictionaryColumnType>(std::forward<Args>(args)...)),
: ColumnLowCardinalityT(std::make_shared<DictionaryColumnType>(std::forward<Args>(args)...))
{}

// Create LC<T> column from existing T-column, making a deep copy of all contents.
explicit ColumnLowCardinalityT(std::shared_ptr<DictionaryColumnType> dictionary_col)
: ColumnLowCardinality(dictionary_col),
typed_dictionary_(dynamic_cast<DictionaryColumnType &>(*GetDictionary())),
type_(typed_dictionary_.Type()->GetCode())
{}
Expand Down
54 changes: 54 additions & 0 deletions clickhouse/columns/lowcardinalityadaptor.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#pragma once

#include "column.h"
#include "lowcardinality.h"

#include <cassert>

namespace clickhouse {

class CodedOutputStream;
class CodedInputStream;

/** Adapts any ColumnType to be serialized\deserialized as LowCardinality,
* and to be castable to ColumnType via ColumnPtr->As<ColumnType>().
*
* It helps to ease migration of the old codebases, which can't afford to switch
* to using ColumnLowCardinalityT or ColumnLowCardinality directly,
* but still want to benefit from smaller on-wire LowCardinality bandwidth footprint.
*
* Not intended to be used by users directly.
*
* @see ClientOptions, CreateColumnByType
*/
template <typename AdaptedColumnType>
class LowCardinalitySerializationAdaptor : public AdaptedColumnType
{
public:
using AdaptedColumnType::AdaptedColumnType;

/// Loads column data from input stream.
bool Load(CodedInputStream* input, size_t rows) override {
auto new_data_column = this->Slice(0, 0)->template As<AdaptedColumnType>();

ColumnLowCardinalityT<AdaptedColumnType> low_cardinality_col(new_data_column);
if (!low_cardinality_col.Load(input, rows))
return false;

// It safe to reuse `flat_data_column` later since ColumnLowCardinalityT makes a deep copy, but still check just in case.
assert(new_data_column->Size() == 0);

for (size_t i = 0; i < low_cardinality_col.Size(); ++i)
new_data_column->Append(low_cardinality_col[i]);

this->Swap(*new_data_column);
return true;
}

/// Saves column data to output stream.
void Save(CodedOutputStream* output) override {
ColumnLowCardinalityT<AdaptedColumnType>(this->template As<AdaptedColumnType>()).Save(output);
}
};

}
48 changes: 48 additions & 0 deletions ut/client_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,54 @@ TEST_P(ClientCase, LowCardinality_InsertAfterClear) {
ASSERT_EQ(total_rows, data.size());
}

TEST_P(ClientCase, LowCardinalityString_AsString) {
// Validate that LowCardinality(String) column values can be INSERTed from client as ColumnString
// and also read on client (enabled by special option) as ColumnString.

ClientOptions options = GetParam();
options.SetBakcwardCompatibilityFeatureLowCardinalityAsWrappedColumn(true);

client_ = std::make_unique<Client>(GetParam());
client_->Execute("CREATE DATABASE IF NOT EXISTS test_clickhouse_cpp");

Block block;
auto col = std::make_shared<ColumnString>();

client_->Execute("DROP TABLE IF EXISTS " + table_name + ";");
client_->Execute("CREATE TABLE IF NOT EXISTS " + table_name + "( " + column_name + " LowCardinality(String) )"
"ENGINE = Memory");

block.AppendColumn("test_column", col);

const std::vector<std::string> data{{"FooBar", "1", "2", "Foo", "4", "Bar", "Foo", "7", "8", "Foo"}};
for (const auto & v : data)
col->Append(v);

block.RefreshRowCount();
client_->Insert(table_name, block);

// Now that we can access data via ColumnString instead of ColumnLowCardinalityT<ColumnString>
size_t total_rows = 0;
client_->Select(getOneColumnSelectQuery(),
[&total_rows, &data](const Block& block) {
total_rows += block.GetRowCount();
if (block.GetRowCount() == 0) {
return;
}

ASSERT_EQ(1U, block.GetColumnCount());
if (auto col = block[0]->As<ColumnString>()) {
ASSERT_EQ(data.size(), col->Size());
for (size_t i = 0; i < col->Size(); ++i) {
EXPECT_EQ(data[i], (*col)[i]) << " at index: " << i;
}
}
}
);

ASSERT_EQ(total_rows, data.size());
}

TEST_P(ClientCase, Generic) {
client_->Execute(
"CREATE TABLE IF NOT EXISTS test_clickhouse_cpp.client (id UInt64, name String) "
Expand Down
12 changes: 12 additions & 0 deletions ut/columns_ut.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -631,6 +631,18 @@ TEST(ColumnsCase, UnmatchedBrackets) {
ASSERT_EQ(nullptr, CreateColumnByType("Array(LowCardinality(Nullable(FixedString(10000)))"));
}

TEST(ColumnsCase, LowCardinalityAsWrappedColumn) {
CreateColumnByTypeSettings create_column_settings;
create_column_settings.low_cardinality_as_wrapped_column = true;

ASSERT_EQ(Type::String, CreateColumnByType("LowCardinality(String)", create_column_settings)->GetType().GetCode());
ASSERT_EQ(Type::String, CreateColumnByType("LowCardinality(String)", create_column_settings)->As<ColumnString>()->GetType().GetCode());

ASSERT_EQ(Type::FixedString, CreateColumnByType("LowCardinality(FixedString(10000))", create_column_settings)->GetType().GetCode());
ASSERT_EQ(Type::FixedString, CreateColumnByType("LowCardinality(FixedString(10000))", create_column_settings)->As<ColumnFixedString>()->GetType().GetCode());
}


class ColumnsCaseWithName : public ::testing::TestWithParam<const char* /*Column Type String*/>
{};

Expand Down