Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ADD BE UDF bitmap_and and bitmap_or #2649

Closed
wants to merge 26 commits into from
Closed
Show file tree
Hide file tree
Changes from 12 commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
f154026
ADD BE BitMap UDF and & or
Jan 3, 2020
c098178
[Index] Implements create drop show index syntax for bitmap index [#2…
yangzhg Jan 3, 2020
5dff936
Fix HLL_UNION_AGG AnalyticFn result in BE core by adding hll_get_valu…
kangkaisen Jan 3, 2020
458ed55
Fix BITMAP_UNION_COUNT couldn't hit rollup table (#2655)
kangkaisen Jan 3, 2020
42dfe13
Add filter conditions for 'show partitions from table' syntax (#2553)
caiconghui Jan 3, 2020
1ca8212
Fix doris be compile error for Ubuntu14.04 (#2647)
caiconghui Jan 3, 2020
af9529a
[Dynamic Partition] Support for automatically adding partitions
WingsGo Jan 3, 2020
1648226
Adapt arrow 0.15 API (#2657)
imay Jan 4, 2020
220ed84
[Unit Test]Fix Schema Change Test Case (#2659)
WingsGo Jan 5, 2020
e1e6830
ADD BE BitMap UDF and & or
Jan 3, 2020
2699cb1
Merge branch 'be_bitmap_udf-0.1' of github.com:DanyBin/incubator-dori…
Jan 6, 2020
8a5ee6a
Fix FE couldn't start (#2662)
kangkaisen Jan 6, 2020
87a5007
Fix bug: parquet scanner don't seek (#2661)
imay Jan 6, 2020
7f148c1
[Build]Make set target arch universal (#2660)
WingsGo Jan 6, 2020
2f990c0
fix bitmap udf
Jan 6, 2020
60ad66b
ADD BE BitMap UDF and & or
Jan 3, 2020
5ebbeb4
fix bitmap udf
Jan 6, 2020
a54a7c0
a
Jan 6, 2020
76ad4ac
Merge branch 'be_bitmap_udf-0.1' of github.com:DanyBin/incubator-dori…
Jan 6, 2020
c6e261d
doc
Jan 6, 2020
80d0d3e
Fix bug: CreateIndexClause can be casted to AlterTableClause (#2667)
songenjie Jan 6, 2020
7ab479a
Fix incompatibility with arm architecture in util and gutil (#2650)
yangzhg Jan 6, 2020
b14a1cf
Add nio support for mysql protocol implementation (#2603)
infearOnTheWay Jan 6, 2020
9f7b253
ADD BE BitMap UDF and & or
Jan 3, 2020
29e0abc
ADD BE BitMap UDF and & or
Jan 3, 2020
6784143
fix bitmap udf
Jan 6, 2020
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 19 additions & 3 deletions be/src/exec/parquet_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,13 @@ ParquetReaderWrap::~ParquetReaderWrap() {
Status ParquetReaderWrap::init_parquet_reader(const std::vector<SlotDescriptor*>& tuple_slot_descs) {
try {
// new file reader for parquet file
_reader.reset(new parquet::arrow::FileReader(arrow::default_memory_pool(),
std::move(parquet::ParquetFileReader::Open(_parquet, _properties))));
auto st = parquet::arrow::FileReader::Make(arrow::default_memory_pool(),
parquet::ParquetFileReader::Open(_parquet, _properties),
&_reader);
if (!st.ok()) {
LOG(WARNING) << "failed to create parquet file reader, errmsg=" << st.ToString();
return Status::InternalError("Failed to create file reader");
}

_file_metadata = _reader->parquet_reader()->metadata();
// initial members
Expand Down Expand Up @@ -511,6 +516,7 @@ arrow::Status ParquetFile::ReadAt(int64_t position, int64_t nbytes, int64_t* byt
position += reads;
out = (char*)out + reads;
}
_pos += *bytes_read;
return arrow::Status::OK();
}

Expand All @@ -531,7 +537,17 @@ arrow::Status ParquetFile::Tell(int64_t* position) const {
}

arrow::Status ParquetFile::Read(int64_t nbytes, std::shared_ptr<arrow::Buffer>* out) {
return arrow::Status::NotImplemented("Not Supported.");
std::shared_ptr<arrow::Buffer> read_buf;
ARROW_RETURN_NOT_OK(arrow::AllocateBuffer(arrow::default_memory_pool(), nbytes, &read_buf));
int64_t bytes_read = 0;
ARROW_RETURN_NOT_OK(ReadAt(_pos, nbytes, &bytes_read, read_buf->mutable_data()));
// If bytes_read is equal with read_buf's capacity, we just assign
if (bytes_read == nbytes) {
*out = std::move(read_buf);
} else {
*out = arrow::SliceBuffer(read_buf, 0, bytes_read);
}
return arrow::Status::OK();
}

}
1 change: 1 addition & 0 deletions be/src/exec/parquet_reader.h
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ class ParquetFile : public arrow::io::RandomAccessFile {
bool closed() const override;
private:
FileReader *_file;
int64_t _pos = 0;
};

// Reader of broker parquet file
Expand Down
45 changes: 45 additions & 0 deletions be/src/exprs/bitmap_function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -388,6 +388,51 @@ BigIntVal BitmapFunctions::bitmap_intersect_finalize(FunctionContext* ctx, const
return result;
}

StringVal BitmapFunctions::bitmap_or(FunctionContext* ctx, const StringVal& src, const StringVal& dst){
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

src and dst can be null, you should handle this case.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK. I will update doc and fix this case.

RoaringBitmap bitmap;
if(!src.is_null){
if(src.len == 0 ){
bitmap.merge(*reinterpret_cast<RoaringBitmap*>(src.ptr));
} else{
bitmap.merge(RoaringBitmap ((char*)src.ptr));
}
}

if(!dst.is_null){
if(dst.len == 0){
bitmap.merge(*reinterpret_cast<RoaringBitmap*>(dst.ptr));
} else{
bitmap.merge(RoaringBitmap ((char*)dst.ptr));
}
}

StringVal result(ctx,bitmap.size());
bitmap.serialize((char*)result.ptr);
return result;
}
StringVal BitmapFunctions::bitmap_and(FunctionContext* ctx, const StringVal& src, const StringVal& dst){
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is the third parameter called ‘dst’ appropriate?

RoaringBitmap bitmap;
if(!src.is_null){
if(src.len == 0 ){
bitmap.merge(*reinterpret_cast<RoaringBitmap*>(src.ptr));
} else{
bitmap.merge(RoaringBitmap ((char*)src.ptr));
}
}

if(!dst.is_null){
if(dst.len == 0){
bitmap.intersect(*reinterpret_cast<RoaringBitmap*>(dst.ptr));
} else{
bitmap.intersect(RoaringBitmap ((char*)dst.ptr));
}
}

StringVal result(ctx,bitmap.size());
bitmap.serialize((char*)result.ptr);
return result;
}


template void BitmapFunctions::bitmap_update_int<TinyIntVal>(
FunctionContext* ctx, const TinyIntVal& src, StringVal* dst);
Expand Down
2 changes: 2 additions & 0 deletions be/src/exprs/bitmap_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,8 @@ class BitmapFunctions {
static StringVal to_bitmap(FunctionContext* ctx, const StringVal& src);
static StringVal bitmap_hash(FunctionContext* ctx, const StringVal& src);

static StringVal bitmap_or(FunctionContext* ctx, const StringVal& src,const StringVal& dst);
static StringVal bitmap_and(FunctionContext* ctx, const StringVal& src,const StringVal& dst);

// bitmap_intersect
template<typename T, typename ValType>
Expand Down
6 changes: 6 additions & 0 deletions be/src/exprs/hll_function.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,12 @@ BigIntVal HllFunctions::hll_finalize(FunctionContext*, const StringVal &src) {
return result;
}

BigIntVal HllFunctions::hll_get_value(FunctionContext*, const StringVal &src) {
auto* src_hll = reinterpret_cast<HyperLogLog*>(src.ptr);
BigIntVal result(src_hll->estimate_cardinality());
return result;
}

BigIntVal HllFunctions::hll_cardinality(FunctionContext* ctx, const StringVal& input) {
if (input.is_null) {
return BigIntVal::null();
Expand Down
4 changes: 4 additions & 0 deletions be/src/exprs/hll_function.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ class HllFunctions {

static BigIntVal hll_finalize(FunctionContext*, const StringVal& src);

// Get the hll cardinality, the difference from hll_finalize method is
// hll_get_value method doesn't free memory
static BigIntVal hll_get_value(FunctionContext*, const StringVal& src);

static StringVal hll_serialize(FunctionContext* ctx, const StringVal& src);

static BigIntVal hll_cardinality(FunctionContext* ctx, const StringVal& src);
Expand Down
6 changes: 6 additions & 0 deletions be/src/olap/olap_snapshot_converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -335,6 +335,9 @@ OLAPStatus OlapSnapshotConverter::to_column_pb(const ColumnMessage& column_msg,
if (column_msg.has_is_bf_column()) {
column_pb->set_is_bf_column(column_msg.is_bf_column());
}
if (column_msg.has_has_bitmap_index()) {
column_pb->set_has_bitmap_index(column_msg.has_bitmap_index());
}
// TODO(ygl) calculate column id from column list
// column_pb->set_referenced_column_id(column_msg.());

Expand Down Expand Up @@ -388,6 +391,9 @@ OLAPStatus OlapSnapshotConverter::to_column_msg(const ColumnPB& column_pb, Colum
if (column_pb.has_is_bf_column()) {
column_msg->set_is_bf_column(column_pb.is_bf_column());
}
if (column_pb.has_has_bitmap_index()) {
column_msg->set_has_bitmap_index(column_pb.has_bitmap_index());
}
column_msg->set_is_root_column(true);
return OLAP_SUCCESS;
}
Expand Down
1 change: 1 addition & 0 deletions be/src/olap/rowset/segment_v2/segment_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -520,6 +520,7 @@ Status SegmentIterator::next_batch(RowBlockV2* block) {
column_predicate->evaluate(&column_block, block->selection_vector(), &selected_size);
}
block->set_selected_size(selected_size);
block->set_num_rows(selected_size);
_opts.stats->rows_vec_cond_filtered += original_size - selected_size;
}

Expand Down
15 changes: 11 additions & 4 deletions be/src/olap/rowset/segment_v2/segment_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,10 +62,6 @@ Status SegmentWriter::init(uint32_t write_mbytes_per_sec) {
if (column.is_key()) {
opts.need_zone_map = true;
}
// TODO set opts.need_bitmap_index based on table properties
if (!column.is_key()) {
opts.need_bitmap_index = _opts.need_bitmap_index;
}
if (column.is_bf_column()) {
opts.need_bloom_filter = true;
if ((column.aggregation() == OLAP_FIELD_AGGREGATION_REPLACE
Expand All @@ -77,6 +73,17 @@ Status SegmentWriter::init(uint32_t write_mbytes_per_sec) {
opts.need_bloom_filter = false;
}
}
if (column.has_bitmap_index()) {
opts.need_bitmap_index = true;
if ((column.aggregation() == OLAP_FIELD_AGGREGATION_REPLACE
|| column.aggregation() == OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL)
&& !_opts.whether_to_filter_value) {
// if the column's Aggregation type is OLAP_FIELD_AGGREGATION_REPLACE or
// OLAP_FIELD_AGGREGATION_REPLACE_IF_NOT_NULL and the segment is not in base rowset,
// do not write the bitmap index because it is useless
opts.need_bitmap_index = false;
}
}

std::unique_ptr<Field> field(FieldFactory::create(column));
DCHECK(field.get() != nullptr);
Expand Down
2 changes: 0 additions & 2 deletions be/src/olap/rowset/segment_v2/segment_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,8 +44,6 @@ extern const uint32_t k_segment_magic_length;

struct SegmentWriterOptions {
uint32_t num_rows_per_block = 1024;
// Todo(kks): only for UT, we should remove it when we support bitmap_index in FE
bool need_bitmap_index = false;
// whether to filter value column against bloom filter/zone map
bool whether_to_filter_value = false;
};
Expand Down
4 changes: 4 additions & 0 deletions be/src/olap/schema_change.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1931,6 +1931,10 @@ OLAPStatus SchemaChangeHandler::_parse_request(TabletSharedPtr base_tablet,
!= ref_tablet_schema.column(column_mapping->ref_column).is_bf_column()) {
*sc_directly = true;
return OLAP_SUCCESS;
} else if (new_tablet_schema.column(i).has_bitmap_index()
!= ref_tablet_schema.column(column_mapping->ref_column).has_bitmap_index()) {
*sc_directly = true;
return OLAP_SUCCESS;
}
}
}
Expand Down
12 changes: 11 additions & 1 deletion be/src/olap/tablet_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1198,7 +1198,17 @@ OLAPStatus TabletManager::_create_inital_rowset_unlocked(
context.tablet_id = tablet->tablet_id();
context.partition_id = tablet->partition_id();
context.tablet_schema_hash = tablet->schema_hash();
context.rowset_type = StorageEngine::instance()->default_rowset_type();
if (!request.__isset.storage_format || request.storage_format == TStorageFormat::DEFAULT) {
context.rowset_type = StorageEngine::instance()->default_rowset_type();
} else if (request.storage_format == TStorageFormat::V1){
context.rowset_type = RowsetTypePB::ALPHA_ROWSET;
} else if (request.storage_format == TStorageFormat::V2) {
context.rowset_type = RowsetTypePB::BETA_ROWSET;
} else {
LOG(ERROR) << "invalid TStorageFormat: " << request.storage_format;
DCHECK(false);
context.rowset_type = StorageEngine::instance()->default_rowset_type();
}
context.rowset_path_prefix = tablet->tablet_path();
context.tablet_schema = &(tablet->tablet_schema());
context.rowset_state = VISIBLE;
Expand Down
13 changes: 13 additions & 0 deletions be/src/olap/tablet_meta.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#include "olap/tablet_meta.h"

#include <sstream>
#include <boost/algorithm/string.hpp>

#include "olap/file_helper.h"
#include "olap/olap_common.h"
Expand Down Expand Up @@ -116,6 +117,7 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id,
uint32_t unique_id = col_ordinal_to_unique_id.at(col_ordinal++);
column->set_unique_id(unique_id);
column->set_name(tcolumn.column_name);
column->set_has_bitmap_index(false);
string data_type;
EnumToString(TPrimitiveType, tcolumn.column_type.type, data_type);
column->set_type(data_type);
Expand Down Expand Up @@ -152,6 +154,17 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id,
column->set_is_bf_column(tcolumn.is_bloom_filter_column);
has_bf_columns = true;
}
if (tablet_schema.__isset.indexes) {
for (auto& index : tablet_schema.indexes) {
if (index.index_type == TIndexType::type::BITMAP) {
DCHECK_EQ(index.columns.size(), 1);
if (boost::iequals(tcolumn.column_name, index.columns[0])) {
column->set_has_bitmap_index(true);
break;
}
}
}
}
}

schema->set_next_column_unique_id(next_unique_id);
Expand Down
8 changes: 8 additions & 0 deletions be/src/olap/tablet_schema.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -295,6 +295,11 @@ OLAPStatus TabletColumn::init_from_pb(const ColumnPB& column) {
} else {
_is_bf_column = false;
}
if (column.has_has_bitmap_index()) {
_has_bitmap_index = column.has_bitmap_index();
} else {
_has_bitmap_index = false;
}
_has_referenced_column = column.has_referenced_column_id();
if (_has_referenced_column) {
_referenced_column_id = column.referenced_column_id();
Expand Down Expand Up @@ -327,6 +332,9 @@ OLAPStatus TabletColumn::to_schema_pb(ColumnPB* column) {
if (_has_referenced_column) {
column->set_referenced_column_id(_referenced_column_id);
}
if (_has_bitmap_index) {
column->set_has_bitmap_index(_has_bitmap_index);
}
return OLAP_SUCCESS;
}

Expand Down
3 changes: 3 additions & 0 deletions be/src/olap/tablet_schema.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ class TabletColumn {
inline bool is_key() const { return _is_key; }
inline bool is_nullable() const { return _is_nullable; }
inline bool is_bf_column() const { return _is_bf_column; }
inline bool has_bitmap_index() const {return _has_bitmap_index; }
bool has_default_value() const { return _has_default_value; }
std::string default_value() const { return _default_value; }
bool has_reference_column() const { return _has_referenced_column; }
Expand Down Expand Up @@ -80,6 +81,8 @@ class TabletColumn {
bool _has_referenced_column;
int32_t _referenced_column_id;
std::string _referenced_column;

bool _has_bitmap_index = false;
};

class TabletSchema {
Expand Down
56 changes: 47 additions & 9 deletions be/test/exprs/bitmap_function_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,19 +28,17 @@

namespace doris {

StringVal convert_bitmap_to_string(FunctionContext* ctx, RoaringBitmap& bitmap) {
std::string buf;
buf.resize(bitmap.size());
bitmap.serialize((char*)buf.c_str());
return AnyValUtil::from_string_temp(ctx, buf);
StringVal convert_bitmap_to_string(FunctionContext* ctx,RoaringBitmap& bitmap) {
StringVal result(ctx, bitmap.size());
bitmap.serialize((char*)result.ptr);
return result;
}

template<typename T>
StringVal convert_bitmap_intersect_to_string(FunctionContext* ctx, BitmapIntersect<T>& intersect) {
std::string buf;
buf.resize(intersect.size());
intersect.serialize((char*)buf.c_str());
return AnyValUtil::from_string_temp(ctx, buf);
StringVal result(ctx,intersect.size());
intersect.serialize((char*)result.ptr);
return result;
}

class BitmapFunctionsTest : public testing::Test {
Expand Down Expand Up @@ -248,6 +246,46 @@ TEST_F(BitmapFunctionsTest, test_bitmap_intersect) {
}


TEST_F(BitmapFunctionsTest,bitmap_or) {
RoaringBitmap bitmap1(1024);
bitmap1.update(1);
bitmap1.update(2019);

RoaringBitmap bitmap2(33);
bitmap2.update(44);
bitmap2.update(55);

StringVal bitmap_src = convert_bitmap_to_string(ctx, bitmap1);
StringVal bitmap_dst = convert_bitmap_to_string(ctx, bitmap2);

StringVal bitmap_str = BitmapFunctions::bitmap_or(ctx,bitmap_src,bitmap_dst);
BigIntVal result = BitmapFunctions::bitmap_count(ctx,bitmap_str);

BigIntVal expected(6);
ASSERT_EQ(expected, result);
}


TEST_F(BitmapFunctionsTest,bitmap_and) {
RoaringBitmap bitmap1(1024);
bitmap1.update(1);
bitmap1.update(2019);

RoaringBitmap bitmap2(33);
bitmap2.update(44);
bitmap2.update(2019);

StringVal bitmap_src = convert_bitmap_to_string(ctx, bitmap1);
StringVal bitmap_dst = convert_bitmap_to_string(ctx, bitmap2);

StringVal bitmap_str = BitmapFunctions::bitmap_and(ctx,bitmap_src,bitmap_dst);
BigIntVal result = BitmapFunctions::bitmap_count(ctx,bitmap_str);

BigIntVal expected(1);
ASSERT_EQ(expected, result);
}


}

int main(int argc, char** argv) {
Expand Down
Loading