Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[optimize](zonemap) skip zonemap if predicate does not support_zonemap for branch-2.0-var #27608

Merged
merged 2 commits into from
Nov 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions be/src/olap/block_column_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@ class BlockColumnPredicate {
virtual void evaluate_vec(vectorized::MutableColumns& block, uint16_t size, bool* flags) const {
}

virtual bool support_zonemap() const { return true; }

virtual bool evaluate_and(const std::pair<WrapperField*, WrapperField*>& statistic) const {
LOG(FATAL) << "should not reach here";
return true;
Expand Down Expand Up @@ -113,6 +115,7 @@ class SingleColumnBlockPredicate : public BlockColumnPredicate {
uint16_t selected_size) const override;
void evaluate_and(vectorized::MutableColumns& block, uint16_t* sel, uint16_t selected_size,
bool* flags) const override;
bool support_zonemap() const override { return _predicate->support_zonemap(); }
bool evaluate_and(const std::pair<WrapperField*, WrapperField*>& statistic) const override;
bool evaluate_and(const segment_v2::BloomFilter* bf) const override;
bool evaluate_and(const StringRef* dict_words, const size_t dict_num) const override;
Expand All @@ -139,6 +142,16 @@ class MutilColumnBlockPredicate : public BlockColumnPredicate {
}
}

bool support_zonemap() const override {
for (const auto *child_block_predicate : _block_column_predicate_vec) {
if (!child_block_predicate->support_zonemap()) {
return false;
}
}

return true;
}

void add_column_predicate(const BlockColumnPredicate* column_predicate) {
_block_column_predicate_vec.push_back(column_predicate);
}
Expand Down
2 changes: 2 additions & 0 deletions be/src/olap/column_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,8 @@ class ColumnPredicate {
virtual void evaluate_or(const vectorized::IColumn& column, const uint16_t* sel, uint16_t size,
bool* flags) const {}

virtual bool support_zonemap() const { return true; }

virtual bool evaluate_and(const std::pair<WrapperField*, WrapperField*>& statistic) const {
return true;
}
Expand Down
2 changes: 2 additions & 0 deletions be/src/olap/match_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ class MatchPredicate : public ColumnPredicate {

const std::string& get_value() const { return _value; }

bool support_zonemap() const override { return false; }

//evaluate predicate on Bitmap
virtual Status evaluate(BitmapIndexIterator* iterator, uint32_t num_rows,
roaring::Roaring* roaring) const override {
Expand Down
3 changes: 3 additions & 0 deletions be/src/olap/olap_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -336,6 +336,9 @@ struct OlapReaderStatistics {
// the number of rows filtered by various column indexes.
int64_t rows_conditions_filtered = 0;
int64_t block_conditions_filtered_ns = 0;
int64_t block_conditions_filtered_bf_ns = 0;
int64_t block_conditions_filtered_zonemap_ns = 0;
int64_t block_conditions_filtered_dict_ns = 0;

int64_t index_load_ns = 0;

Expand Down
164 changes: 90 additions & 74 deletions be/src/olap/rowset/segment_v2/segment_iterator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -447,90 +447,106 @@ Status SegmentIterator::_get_row_ranges_from_conditions(RowRanges* condition_row
cids.insert(entry.first);
}

// first filter data by bloom filter index
// bloom filter index only use CondColumn
RowRanges bf_row_ranges = RowRanges::create_single(num_rows());
for (auto& cid : cids) {
if (!_segment->is_same_file_col_type_with_expected(
cid, *_schema, _opts.io_ctx.reader_type != ReaderType::READER_QUERY)) {
continue;
}
// get row ranges by bf index of this column,
RowRanges column_bf_row_ranges = RowRanges::create_single(num_rows());
DCHECK(_opts.col_id_to_predicates.count(cid) > 0);
RETURN_IF_ERROR(_column_iterators[cid]->get_row_ranges_by_bloom_filter(
_opts.col_id_to_predicates.at(cid).get(), &column_bf_row_ranges));
RowRanges::ranges_intersection(bf_row_ranges, column_bf_row_ranges, &bf_row_ranges);
}
size_t pre_size = 0;

size_t pre_size = condition_row_ranges->count();
RowRanges::ranges_intersection(*condition_row_ranges, bf_row_ranges, condition_row_ranges);
_opts.stats->rows_bf_filtered += (pre_size - condition_row_ranges->count());

RowRanges zone_map_row_ranges = RowRanges::create_single(num_rows());
// second filter data by zone map
for (auto& cid : cids) {
if (!_segment->is_same_file_col_type_with_expected(
cid, *_schema, _opts.io_ctx.reader_type != ReaderType::READER_QUERY)) {
continue;
{
SCOPED_RAW_TIMER(&_opts.stats->block_conditions_filtered_bf_ns);
// first filter data by bloom filter index
// bloom filter index only use CondColumn
RowRanges bf_row_ranges = RowRanges::create_single(num_rows());
for (auto& cid : cids) {
if (!_segment->is_same_file_col_type_with_expected(
cid, *_schema, _opts.io_ctx.reader_type != ReaderType::READER_QUERY)) {
continue;
}
// get row ranges by bf index of this column,
RowRanges column_bf_row_ranges = RowRanges::create_single(num_rows());
DCHECK(_opts.col_id_to_predicates.count(cid) > 0);
RETURN_IF_ERROR(_column_iterators[cid]->get_row_ranges_by_bloom_filter(
_opts.col_id_to_predicates.at(cid).get(), &column_bf_row_ranges));
RowRanges::ranges_intersection(bf_row_ranges, column_bf_row_ranges, &bf_row_ranges);
}
// get row ranges by zone map of this column,
RowRanges column_row_ranges = RowRanges::create_single(num_rows());
DCHECK(_opts.col_id_to_predicates.count(cid) > 0);
RETURN_IF_ERROR(_column_iterators[cid]->get_row_ranges_by_zone_map(
_opts.col_id_to_predicates.at(cid).get(),
_opts.del_predicates_for_zone_map.count(cid) > 0
? &(_opts.del_predicates_for_zone_map.at(cid))
: nullptr,
&column_row_ranges));
// intersect different columns's row ranges to get final row ranges by zone map
RowRanges::ranges_intersection(zone_map_row_ranges, column_row_ranges,
&zone_map_row_ranges);
}

std::shared_ptr<doris::ColumnPredicate> runtime_predicate = nullptr;
if (_opts.use_topn_opt) {
auto query_ctx = _opts.runtime_state->get_query_ctx();
runtime_predicate = query_ctx->get_runtime_predicate().get_predictate();
if (runtime_predicate && _segment->is_same_file_col_type_with_expected(
runtime_predicate->column_id(), *_schema,
_opts.io_ctx.reader_type != ReaderType::READER_QUERY)) {
AndBlockColumnPredicate and_predicate;
auto single_predicate = new SingleColumnBlockPredicate(runtime_predicate.get());
and_predicate.add_column_predicate(single_predicate);

RowRanges column_rp_row_ranges = RowRanges::create_single(num_rows());
RETURN_IF_ERROR(
_column_iterators[runtime_predicate->column_id()]->get_row_ranges_by_zone_map(
&and_predicate, nullptr, &column_rp_row_ranges));
pre_size = condition_row_ranges->count();
RowRanges::ranges_intersection(*condition_row_ranges, bf_row_ranges, condition_row_ranges);
_opts.stats->rows_bf_filtered += (pre_size - condition_row_ranges->count());
}

{
SCOPED_RAW_TIMER(&_opts.stats->block_conditions_filtered_zonemap_ns);
RowRanges zone_map_row_ranges = RowRanges::create_single(num_rows());
// second filter data by zone map
for (auto& cid : cids) {
if (!_segment->is_same_file_col_type_with_expected(
cid, *_schema, _opts.io_ctx.reader_type != ReaderType::READER_QUERY)) {
continue;
}
DCHECK(_opts.col_id_to_predicates.count(cid) > 0);
// do not check zonemap if predicate does not support zonemap
if (!_opts.col_id_to_predicates.at(cid)->support_zonemap()) {
LOG(WARNING) << "skip zonemap for column " << cid;
continue;
}
// get row ranges by zone map of this column,
RowRanges column_row_ranges = RowRanges::create_single(num_rows());
RETURN_IF_ERROR(_column_iterators[cid]->get_row_ranges_by_zone_map(
_opts.col_id_to_predicates.at(cid).get(),
_opts.del_predicates_for_zone_map.count(cid) > 0
? &(_opts.del_predicates_for_zone_map.at(cid))
: nullptr,
&column_row_ranges));
// intersect different columns's row ranges to get final row ranges by zone map
RowRanges::ranges_intersection(zone_map_row_ranges, column_rp_row_ranges,
&zone_map_row_ranges);
RowRanges::ranges_intersection(zone_map_row_ranges, column_row_ranges,
&zone_map_row_ranges);
}

std::shared_ptr<doris::ColumnPredicate> runtime_predicate = nullptr;
if (_opts.use_topn_opt) {
auto query_ctx = _opts.runtime_state->get_query_ctx();
runtime_predicate = query_ctx->get_runtime_predicate().get_predictate();
if (runtime_predicate && _segment->is_same_file_col_type_with_expected(
runtime_predicate->column_id(), *_schema,
_opts.io_ctx.reader_type != ReaderType::READER_QUERY)) {
AndBlockColumnPredicate and_predicate;
auto single_predicate = new SingleColumnBlockPredicate(runtime_predicate.get());
and_predicate.add_column_predicate(single_predicate);

RowRanges column_rp_row_ranges = RowRanges::create_single(num_rows());
RETURN_IF_ERROR(
_column_iterators[runtime_predicate->column_id()]->get_row_ranges_by_zone_map(
&and_predicate, nullptr, &column_rp_row_ranges));

// intersect different columns's row ranges to get final row ranges by zone map
RowRanges::ranges_intersection(zone_map_row_ranges, column_rp_row_ranges,
&zone_map_row_ranges);
}
}

pre_size = condition_row_ranges->count();
RowRanges::ranges_intersection(*condition_row_ranges, zone_map_row_ranges,
condition_row_ranges);
_opts.stats->rows_stats_filtered += (pre_size - condition_row_ranges->count());
}

pre_size = condition_row_ranges->count();
RowRanges::ranges_intersection(*condition_row_ranges, zone_map_row_ranges,
condition_row_ranges);
_opts.stats->rows_stats_filtered += (pre_size - condition_row_ranges->count());
{
SCOPED_RAW_TIMER(&_opts.stats->block_conditions_filtered_dict_ns);
/// Low cardinality optimization is currently not very stable, so to prevent data corruption,
/// we are temporarily disabling its use in data compaction.
if (_opts.io_ctx.reader_type == ReaderType::READER_QUERY) {
RowRanges dict_row_ranges = RowRanges::create_single(num_rows());
for (auto cid : cids) {
RowRanges tmp_row_ranges = RowRanges::create_single(num_rows());
DCHECK(_opts.col_id_to_predicates.count(cid) > 0);
RETURN_IF_ERROR(_column_iterators[cid]->get_row_ranges_by_dict(
_opts.col_id_to_predicates.at(cid).get(), &tmp_row_ranges));
RowRanges::ranges_intersection(dict_row_ranges, tmp_row_ranges, &dict_row_ranges);
}

/// Low cardinality optimization is currently not very stable, so to prevent data corruption,
/// we are temporarily disabling its use in data compaction.
if (_opts.io_ctx.reader_type == ReaderType::READER_QUERY) {
RowRanges dict_row_ranges = RowRanges::create_single(num_rows());
for (auto cid : cids) {
RowRanges tmp_row_ranges = RowRanges::create_single(num_rows());
DCHECK(_opts.col_id_to_predicates.count(cid) > 0);
RETURN_IF_ERROR(_column_iterators[cid]->get_row_ranges_by_dict(
_opts.col_id_to_predicates.at(cid).get(), &tmp_row_ranges));
RowRanges::ranges_intersection(dict_row_ranges, tmp_row_ranges, &dict_row_ranges);
pre_size = condition_row_ranges->count();
RowRanges::ranges_intersection(*condition_row_ranges, dict_row_ranges,
condition_row_ranges);
_opts.stats->rows_dict_filtered += (pre_size - condition_row_ranges->count());
}

pre_size = condition_row_ranges->count();
RowRanges::ranges_intersection(*condition_row_ranges, dict_row_ranges,
condition_row_ranges);
_opts.stats->rows_dict_filtered += (pre_size - condition_row_ranges->count());
}

return Status::OK();
Expand Down
3 changes: 3 additions & 0 deletions be/src/vec/exec/scan/new_olap_scan_node.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,9 @@ Status NewOlapScanNode::_init_profile() {
_block_init_seek_timer = ADD_TIMER(_segment_profile, "BlockInitSeekTime");
_block_init_seek_counter = ADD_COUNTER(_segment_profile, "BlockInitSeekCount", TUnit::UNIT);
_block_conditions_filtered_timer = ADD_TIMER(_segment_profile, "BlockConditionsFilteredTime");
_block_conditions_filtered_bf_timer = ADD_TIMER(_segment_profile, "BlockConditionsFilteredBloomFilterTime");
_block_conditions_filtered_zonemap_timer = ADD_TIMER(_segment_profile, "BlockConditionsFilteredZonemapTime");
_block_conditions_filtered_dict_timer = ADD_TIMER(_segment_profile, "BlockConditionsFilteredDictTime");

_rows_vec_cond_filtered_counter =
ADD_COUNTER(_segment_profile, "RowsVectorPredFiltered", TUnit::UNIT);
Expand Down
3 changes: 3 additions & 0 deletions be/src/vec/exec/scan/new_olap_scan_node.h
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,9 @@ class NewOlapScanNode : public VScanNode {
RuntimeProfile::Counter* _block_init_seek_timer = nullptr;
RuntimeProfile::Counter* _block_init_seek_counter = nullptr;
RuntimeProfile::Counter* _block_conditions_filtered_timer = nullptr;
RuntimeProfile::Counter* _block_conditions_filtered_bf_timer = nullptr;
RuntimeProfile::Counter* _block_conditions_filtered_zonemap_timer = nullptr;
RuntimeProfile::Counter* _block_conditions_filtered_dict_timer = nullptr;
RuntimeProfile::Counter* _first_read_timer = nullptr;
RuntimeProfile::Counter* _second_read_timer = nullptr;
RuntimeProfile::Counter* _first_read_seek_timer = nullptr;
Expand Down
6 changes: 6 additions & 0 deletions be/src/vec/exec/scan/new_olap_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -588,6 +588,12 @@ void NewOlapScanner::_update_counters_before_close() {
COUNTER_UPDATE(olap_parent->_block_init_seek_counter, stats.block_init_seek_num);
COUNTER_UPDATE(olap_parent->_block_conditions_filtered_timer,
stats.block_conditions_filtered_ns);
COUNTER_UPDATE(olap_parent->_block_conditions_filtered_bf_timer,
stats.block_conditions_filtered_bf_ns);
COUNTER_UPDATE(olap_parent->_block_conditions_filtered_zonemap_timer,
stats.block_conditions_filtered_zonemap_ns);
COUNTER_UPDATE(olap_parent->_block_conditions_filtered_dict_timer,
stats.block_conditions_filtered_dict_ns);
COUNTER_UPDATE(olap_parent->_first_read_timer, stats.first_read_ns);
COUNTER_UPDATE(olap_parent->_second_read_timer, stats.second_read_ns);
COUNTER_UPDATE(olap_parent->_first_read_seek_timer, stats.block_first_read_seek_ns);
Expand Down