Skip to content

Commit

Permalink
[fix](ngram bloomfilter) fix narrow conversion for ngram bf_size (#43480
Browse files Browse the repository at this point in the history
)

### What problem does this PR solve?

Problem Summary:
Fix ngram bloomfilter index coredump as below
```
*** SIGFPE integer divide by zero (@0x56294f026472) received by PID 4016941 (TID 4019213 OR 0x7f294eb4d640) from PID 1325556850; stack trace: ***
 0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*) at be/src/common/signal_handler.h:421
 1# PosixSignals::chained_handler(int, siginfo*, void*) [clone .part.0] in bin/jdk-17.0.2/lib/server/libjvm.so
 2# JVM_handle_linux_signal in bin/jdk-17.0.2/lib/server/libjvm.so
 3# 0x00007F3071042520 in /lib/x86_64-linux-gnu/libc.so.6
 4# doris::segment_v2::NGramBloomFilter::add_bytes(char const*, unsigned int) at be/src/olap/rowset/segment_v2/ngram_bloom_filter.cpp:61
 5# doris::ITokenExtractorHelper<doris::NgramTokenExtractor>::string_to_bloom_filter(char const*, unsigned long, doris::segment_v2::BloomFilter&) const at be/src/olap/itoken_extractor.h:61
 6# doris::segment_v2::NGramBloomFilterIndexWriterImpl::add_values(void const*, unsigned long) at be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp:250
```

Co-authored-by: airborne12 <jiangkai@selectdb.com>
  • Loading branch information
airborne12 and airborne12 authored Nov 12, 2024
1 parent 88a6268 commit 2218e09
Show file tree
Hide file tree
Showing 5 changed files with 77 additions and 8 deletions.
15 changes: 13 additions & 2 deletions be/src/olap/rowset/segment_v2/segment_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,8 +192,19 @@ Status SegmentWriter::_create_column_writer(uint32_t cid, const TabletColumn& co
if (tablet_index) {
opts.need_bloom_filter = true;
opts.is_ngram_bf_index = true;
opts.gram_size = tablet_index->get_gram_size();
opts.gram_bf_size = tablet_index->get_gram_bf_size();
//narrow convert from int32_t to uint8_t and uint16_t which is dangerous
auto gram_size = tablet_index->get_gram_size();
auto gram_bf_size = tablet_index->get_gram_bf_size();
if (gram_size > 256 || gram_size < 1) {
return Status::NotSupported("Do not support ngram bloom filter for ngram_size: ",
gram_size);
}
if (gram_bf_size > 65535 || gram_bf_size < 64) {
return Status::NotSupported("Do not support ngram bloom filter for bf_size: ",
gram_bf_size);
}
opts.gram_size = gram_size;
opts.gram_bf_size = gram_bf_size;
}

opts.need_bitmap_index = column.has_bitmap_index();
Expand Down
15 changes: 13 additions & 2 deletions be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -185,8 +185,19 @@ Status VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo
if (tablet_index) {
opts.need_bloom_filter = true;
opts.is_ngram_bf_index = true;
opts.gram_size = tablet_index->get_gram_size();
opts.gram_bf_size = tablet_index->get_gram_bf_size();
//narrow convert from int32_t to uint8_t and uint16_t which is dangerous
auto gram_size = tablet_index->get_gram_size();
auto gram_bf_size = tablet_index->get_gram_bf_size();
if (gram_size > 256 || gram_size < 1) {
return Status::NotSupported("Do not support ngram bloom filter for ngram_size: ",
gram_size);
}
if (gram_bf_size > 65535 || gram_bf_size < 64) {
return Status::NotSupported("Do not support ngram bloom filter for bf_size: ",
gram_bf_size);
}
opts.gram_size = gram_size;
opts.gram_bf_size = gram_bf_size;
}

opts.need_bitmap_index = column.has_bitmap_index();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -254,8 +254,8 @@ public void checkColumn(Column column, KeysType keysType, boolean enableUniqueKe
if (ngramSize > 256 || ngramSize < 1) {
throw new AnalysisException("gram_size should be integer and less than 256");
}
if (bfSize > 65536 || bfSize < 64) {
throw new AnalysisException("bf_size should be integer and between 64 and 65536");
if (bfSize > 65535 || bfSize < 64) {
throw new AnalysisException("bf_size should be integer and between 64 and 65535");
}
} catch (NumberFormatException e) {
throw new AnalysisException("invalid ngram properties:" + e.getMessage(), e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -141,9 +141,9 @@ public void checkColumn(ColumnDefinition column, KeysType keysType,
throw new AnalysisException(
"gram_size should be integer and less than 256");
}
if (bfSize > 65536 || bfSize < 64) {
if (bfSize > 65535 || bfSize < 64) {
throw new AnalysisException(
"bf_size should be integer and between 64 and 65536");
"bf_size should be integer and between 64 and 65535");
}
} catch (NumberFormatException e) {
throw new AnalysisException("invalid ngram properties:" + e.getMessage(), e);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,4 +59,51 @@ suite("test_ngram_bloomfilter_index") {
qt_select_eq_3 "SELECT * FROM ${tableName} WHERE http_url = '/%/7212503657802320699%' ORDER BY key_id"
qt_select_in_3 "SELECT * FROM ${tableName} WHERE http_url IN ('/%/7212503657802320699%') ORDER BY key_id"
qt_select_like_3 "SELECT * FROM ${tableName} WHERE http_url like '/%/7212503657802320699%' ORDER BY key_id"

//case for bf_size 65536
def tableName2 = 'test_ngram_bloomfilter_index2'
sql "DROP TABLE IF EXISTS ${tableName2}"
test {
sql """
CREATE TABLE IF NOT EXISTS ${tableName2} (
`key_id` bigint(20) NULL COMMENT '',
`category` varchar(200) NULL COMMENT '',
`https_url` varchar(300) NULL COMMENT '',
`hostname` varchar(300) NULL,
`http_url` text NULL COMMENT '',
`url_path` varchar(2000) NULL COMMENT '',
`cnt` bigint(20) NULL COMMENT '',
`host_flag` boolean NULL COMMENT '',
INDEX idx_ngrambf (`http_url`) USING NGRAM_BF PROPERTIES("gram_size" = "2", "bf_size" = "65536")
) ENGINE=OLAP
DUPLICATE KEY(`key_id`, `category`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`key_id`) BUCKETS 3
PROPERTIES("replication_num" = "1");
"""
exception "bf_size should be integer and between 64 and 65535"
}

def tableName3 = 'test_ngram_bloomfilter_index3'
sql "DROP TABLE IF EXISTS ${tableName3}"
sql """
CREATE TABLE IF NOT EXISTS ${tableName3} (
`key_id` bigint(20) NULL COMMENT '',
`category` varchar(200) NULL COMMENT '',
`https_url` varchar(300) NULL COMMENT '',
`hostname` varchar(300) NULL,
`http_url` text NULL COMMENT '',
`url_path` varchar(2000) NULL COMMENT '',
`cnt` bigint(20) NULL COMMENT '',
`host_flag` boolean NULL COMMENT ''
) ENGINE=OLAP
DUPLICATE KEY(`key_id`, `category`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`key_id`) BUCKETS 3
PROPERTIES("replication_num" = "1");
"""
test {
sql """ALTER TABLE ${tableName3} ADD INDEX idx_http_url(http_url) USING NGRAM_BF PROPERTIES("gram_size"="3", "bf_size"="65536") COMMENT 'http_url ngram_bf index'"""
exception "bf_size should be integer and between 64 and 65535"
}
}

0 comments on commit 2218e09

Please sign in to comment.