Skip to content

Commit

Permalink
[fix](bloomfilter) fix inlist support for date/datetimev1 in bloomfil…
Browse files Browse the repository at this point in the history
…ter index (apache#46961)

Problem Summary:
apache#43351 fix date/datetime v1 support in comparison predicate, this PR try
to fix it in inlist predicate.
  • Loading branch information
airborne12 authored Jan 15, 2025
1 parent cc183eb commit a0f4c4f
Show file tree
Hide file tree
Showing 4 changed files with 213 additions and 0 deletions.
17 changes: 17 additions & 0 deletions be/src/olap/in_list_predicate.h
Original file line number Diff line number Diff line change
Expand Up @@ -322,6 +322,23 @@ class InListPredicateBase : public ColumnPredicate {
sizeof(decimal12_t))) {
return true;
}
} else if constexpr (Type == PrimitiveType::TYPE_DATE) {
const T* value = (const T*)(iter->get_value());
uint24_t date_value(value->to_olap_date());
if (bf->test_bytes(
const_cast<char*>(reinterpret_cast<const char*>(&date_value)),
sizeof(uint24_t))) {
return true;
}
// DatetimeV1 using int64_t in bloom filter
} else if constexpr (Type == PrimitiveType::TYPE_DATETIME) {
const T* value = (const T*)(iter->get_value());
int64_t datetime_value(value->to_olap_datetime());
if (bf->test_bytes(
const_cast<char*>(reinterpret_cast<const char*>(&datetime_value)),
sizeof(int64_t))) {
return true;
}
} else {
const T* value = (const T*)(iter->get_value());
if (bf->test_bytes(reinterpret_cast<const char*>(value), sizeof(*value))) {
Expand Down
140 changes: 140 additions & 0 deletions be/test/olap/date_bloom_filter_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@
#include <gtest/gtest.h>

#include "olap/comparison_predicate.h"
#include "olap/in_list_predicate.h"
#include "olap/rowset/beta_rowset.h"
#include "olap/rowset/beta_rowset_writer.h"
#include "olap/rowset/rowset_factory.h"
#include "olap/rowset/segment_v2/bloom_filter_index_reader.h"
#include "olap/storage_engine.h"
#include "runtime/define_primitive_type.h"
#include "util/date_func.h"
#include "vec/runtime/vdatetime_value.h"

Expand Down Expand Up @@ -189,4 +191,142 @@ TEST_F(DateBloomFilterTest, query_index_test) {
test("2024-11-20 09:00:00", false);
}
}

TEST_F(DateBloomFilterTest, in_list_predicate_test) {
EXPECT_TRUE(io::global_local_filesystem()->delete_directory(_tablet->tablet_path()).ok());
EXPECT_TRUE(io::global_local_filesystem()->create_directory(_tablet->tablet_path()).ok());

RowsetSharedPtr rowset;
const auto& res =
RowsetFactory::create_rowset_writer(*_engine_ref, rowset_writer_context(), false);
EXPECT_TRUE(res.has_value()) << res.error();
const auto& rowset_writer = res.value();

Block block = _tablet_schema->create_block();
auto columns = block.mutate_columns();

// Insert test data
auto date = timestamp_from_date("2024-11-08");
auto datetime = timestamp_from_datetime("2024-11-08 09:00:00");
uint24_t olap_date_value(date.to_olap_date());
uint64_t olap_datetime_value(datetime.to_olap_datetime());
columns[0]->insert_many_fix_len_data(reinterpret_cast<const char*>(&olap_date_value), 1);
columns[1]->insert_many_fix_len_data(reinterpret_cast<const char*>(&olap_datetime_value), 1);

date = timestamp_from_date("2024-11-09");
datetime = timestamp_from_datetime("2024-11-09 09:00:00");
olap_date_value = date.to_olap_date();
olap_datetime_value = datetime.to_olap_datetime();
columns[0]->insert_many_fix_len_data(reinterpret_cast<const char*>(&olap_date_value), 1);
columns[1]->insert_many_fix_len_data(reinterpret_cast<const char*>(&olap_datetime_value), 1);

EXPECT_TRUE(rowset_writer->add_block(&block).ok());
EXPECT_TRUE(rowset_writer->flush().ok());
EXPECT_TRUE(rowset_writer->build(rowset).ok());
EXPECT_TRUE(_tablet->add_rowset(rowset).ok());

segment_v2::SegmentSharedPtr segment;
EXPECT_TRUE(((BetaRowset*)rowset.get())->load_segment(0, &segment).ok());
auto st = segment->_create_column_readers(*(segment->_footer_pb));
EXPECT_TRUE(st.ok());

// Test DATE column with IN predicate
{
const auto& reader = segment->_column_readers[0];
std::unique_ptr<BloomFilterIndexIterator> bf_iter;
EXPECT_TRUE(reader->_bloom_filter_index->load(true, true, nullptr).ok());
EXPECT_TRUE(reader->_bloom_filter_index->new_iterator(&bf_iter, nullptr).ok());
std::unique_ptr<BloomFilter> bf;
EXPECT_TRUE(bf_iter->read_bloom_filter(0, &bf).ok());

// Test positive cases
auto test_positive = [&](const std::vector<std::string>& values, bool result) {
auto hybrid_set = std::make_shared<HybridSet<PrimitiveType::TYPE_DATE>>();
for (const auto& value : values) {
auto v = timestamp_from_date(value);
hybrid_set->insert(&v);
}
std::unique_ptr<InListPredicateBase<TYPE_DATE, PredicateType::IN_LIST,
HybridSet<PrimitiveType::TYPE_DATE>>>
date_pred(new InListPredicateBase<TYPE_DATE, PredicateType::IN_LIST,
HybridSet<PrimitiveType::TYPE_DATE>>(
0, hybrid_set));
EXPECT_EQ(date_pred->evaluate_and(bf.get()), result);
};

test_positive({"2024-11-08", "2024-11-09"}, true);
test_positive({"2024-11-08"}, true);
test_positive({"2024-11-09"}, true);

auto test_negative = [&](const std::vector<std::string>& values, bool result) {
auto hybrid_set = std::make_shared<HybridSet<PrimitiveType::TYPE_DATE>>();

for (const auto& value : values) {
auto v = timestamp_from_date(value);
hybrid_set->insert(&v);
}

std::unique_ptr<InListPredicateBase<TYPE_DATE, PredicateType::IN_LIST,
HybridSet<PrimitiveType::TYPE_DATE>>>
date_pred(new InListPredicateBase<TYPE_DATE, PredicateType::IN_LIST,
HybridSet<PrimitiveType::TYPE_DATE>>(
0, hybrid_set));

EXPECT_EQ(date_pred->evaluate_and(bf.get()), result);
};

test_negative({"2024-11-20"}, false);
test_negative({"2024-11-08", "2024-11-20"}, true);
test_negative({"2024-11-20", "2024-11-21"}, false);
}

// Test DATETIME column with IN predicate
{
const auto& reader = segment->_column_readers[1];
std::unique_ptr<BloomFilterIndexIterator> bf_iter;
EXPECT_TRUE(reader->_bloom_filter_index->load(true, true, nullptr).ok());
EXPECT_TRUE(reader->_bloom_filter_index->new_iterator(&bf_iter, nullptr).ok());
std::unique_ptr<BloomFilter> bf;
EXPECT_TRUE(bf_iter->read_bloom_filter(0, &bf).ok());

// Test positive cases
auto test_positive = [&](const std::vector<std::string>& values, bool result) {
auto hybrid_set = std::make_shared<HybridSet<PrimitiveType::TYPE_DATETIME>>();
for (const auto& value : values) {
auto v = timestamp_from_datetime(value);
hybrid_set->insert(&v);
}
std::unique_ptr<InListPredicateBase<TYPE_DATETIME, PredicateType::IN_LIST,
HybridSet<PrimitiveType::TYPE_DATETIME>>>
datetime_pred(new InListPredicateBase<TYPE_DATETIME, PredicateType::IN_LIST,
HybridSet<PrimitiveType::TYPE_DATETIME>>(
0, hybrid_set));
EXPECT_EQ(datetime_pred->evaluate_and(bf.get()), result);
};

test_positive({"2024-11-08 09:00:00", "2024-11-09 09:00:00"}, true);
test_positive({"2024-11-08 09:00:00"}, true);
test_positive({"2024-11-09 09:00:00"}, true);

// Test negative cases
auto test_negative = [&](const std::vector<std::string>& values, bool result) {
auto hybrid_set = std::make_shared<HybridSet<PrimitiveType::TYPE_DATETIME>>();
for (const auto& value : values) {
auto v = timestamp_from_datetime(value);
hybrid_set->insert(&v);
}
std::unique_ptr<InListPredicateBase<TYPE_DATETIME, PredicateType::IN_LIST,
HybridSet<PrimitiveType::TYPE_DATETIME>>>
datetime_pred(new InListPredicateBase<TYPE_DATETIME, PredicateType::IN_LIST,
HybridSet<PrimitiveType::TYPE_DATETIME>>(
0, hybrid_set));
EXPECT_EQ(datetime_pred->evaluate_and(bf.get()), result);
};

test_negative({"2024-11-20 09:00:00"}, false);
test_negative({"2024-11-08 09:00:00", "2024-11-20 09:00:00"}, true);
test_negative({"2024-11-20 09:00:00", "2024-11-21 09:00:00"}, false);
}
}

} // namespace doris
21 changes: 21 additions & 0 deletions regression-test/data/bloom_filter_p0/test_bloom_filter.out
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !select_datetime_v1 --
1 1 1 2024-12-17T20:00 2024-12-17T20:00 2024-12-17 2024-12-17 3.32 3.32
1 1 1 2024-12-17T20:00 2024-12-17T20:00 2024-12-17 2024-12-17 3.32 3.32
2 2 2 2024-12-18T20:00 2024-12-18T20:00 2024-12-18 2024-12-18 3.33 3.33

-- !select_datetime_v2 --
1 1 1 2024-12-17T20:00 2024-12-17T20:00 2024-12-17 2024-12-17 3.32 3.32
1 1 1 2024-12-17T20:00 2024-12-17T20:00 2024-12-17 2024-12-17 3.32 3.32
2 2 2 2024-12-18T20:00 2024-12-18T20:00 2024-12-18 2024-12-18 3.33 3.33

-- !select_date_v1 --
1 1 1 2024-12-17T20:00 2024-12-17T20:00 2024-12-17 2024-12-17 3.32 3.32
1 1 1 2024-12-17T20:00 2024-12-17T20:00 2024-12-17 2024-12-17 3.32 3.32
2 2 2 2024-12-18T20:00 2024-12-18T20:00 2024-12-18 2024-12-18 3.33 3.33

-- !select_date_v2 --
1 1 1 2024-12-17T20:00 2024-12-17T20:00 2024-12-17 2024-12-17 3.32 3.32
1 1 1 2024-12-17T20:00 2024-12-17T20:00 2024-12-17 2024-12-17 3.32 3.32
2 2 2 2024-12-18T20:00 2024-12-18T20:00 2024-12-18 2024-12-18 3.33 3.33

35 changes: 35 additions & 0 deletions regression-test/suites/bloom_filter_p0/test_bloom_filter.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -148,4 +148,39 @@ suite("test_bloom_filter") {
sql """ALTER TABLE ${test_json_tb} SET("bloom_filter_columns" = "k1,j1")"""
exception "not supported in bloom filter index"
}

// bloom filter index for datetime/date/decimal columns
def test_datetime_tb = "test_datetime_bloom_filter_tb"
sql """DROP TABLE IF EXISTS ${test_datetime_tb}"""
sql """ADMIN SET FRONTEND CONFIG ('disable_decimalv2' = 'false')"""
sql """ADMIN SET FRONTEND CONFIG ('disable_datev1' = 'false')"""
sql """CREATE TABLE IF NOT EXISTS ${test_datetime_tb} (
a int,
b int,
c int,
d DATETIMEV1,
d2 DATETIMEV2,
da DATEv1,
dav2 DATEV2,
dec decimal(10,2),
dec2 decimalv2(10,2)
) ENGINE=OLAP
DUPLICATE KEY(a)
DISTRIBUTED BY HASH(a) BUCKETS 5
PROPERTIES (
"replication_num" = "1"
)"""
sql """INSERT INTO ${test_datetime_tb} VALUES
(1,1,1,"2024-12-17 20:00:00", "2024-12-17 20:00:00", "2024-12-17", "2024-12-17", "3.32", "3.32"),
(1,1,1,"2024-12-17 20:00:00", "2024-12-17 20:00:00", "2024-12-17", "2024-12-17", "3.32", "3.32"),
(2,2,2,"2024-12-18 20:00:00", "2024-12-18 20:00:00", "2024-12-18", "2024-12-18", "3.33", "3.33"),
(3,3,3,"2024-12-22 20:00:00", "2024-12-22 20:00:00", "2024-12-22", "2024-12-22", "4.33", "4.33")"""
sql """ALTER TABLE ${test_datetime_tb} SET ("bloom_filter_columns" = "d,d2,da,dav2,dec,dec2")"""
Thread.sleep(3000)
qt_select_datetime_v1 """SELECT * FROM ${test_datetime_tb} WHERE d IN ("2024-12-17 20:00:00", "2024-12-18 20:00:00") order by a"""
qt_select_datetime_v2 """SELECT * FROM ${test_datetime_tb} WHERE d2 IN ("2024-12-17 20:00:00", "2024-12-18 20:00:00") order by a"""
qt_select_date_v1 """SELECT * FROM ${test_datetime_tb} WHERE da IN ("2024-12-17", "2024-12-18") order by a"""
qt_select_date_v2 """SELECT * FROM ${test_datetime_tb} WHERE dav2 IN ("2024-12-17", "2024-12-18") order by a"""
sql """ADMIN SET FRONTEND CONFIG ('disable_decimalv2' = 'true')"""
sql """ADMIN SET FRONTEND CONFIG ('disable_datev1' = 'true')"""
}

0 comments on commit a0f4c4f

Please sign in to comment.