From 3e7193996c2b3a50d06a2f5db6146e7df11575d5 Mon Sep 17 00:00:00 2001 From: airborne12 Date: Tue, 12 Nov 2024 09:10:42 +0800 Subject: [PATCH] [fix](ngram bloomfilter) fix narrow conversion for ngram bf_size (#43480) Problem Summary: Fix ngram bloomfilter index coredump as below ``` *** SIGFPE integer divide by zero (@0x56294f026472) received by PID 4016941 (TID 4019213 OR 0x7f294eb4d640) from PID 1325556850; stack trace: *** 0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*) at be/src/common/signal_handler.h:421 1# PosixSignals::chained_handler(int, siginfo*, void*) [clone .part.0] in bin/jdk-17.0.2/lib/server/libjvm.so 2# JVM_handle_linux_signal in bin/jdk-17.0.2/lib/server/libjvm.so 3# 0x00007F3071042520 in /lib/x86_64-linux-gnu/libc.so.6 4# doris::segment_v2::NGramBloomFilter::add_bytes(char const*, unsigned int) at be/src/olap/rowset/segment_v2/ngram_bloom_filter.cpp:61 5# doris::ITokenExtractorHelper::string_to_bloom_filter(char const*, unsigned long, doris::segment_v2::BloomFilter&) const at be/src/olap/itoken_extractor.h:61 6# doris::segment_v2::NGramBloomFilterIndexWriterImpl::add_values(void const*, unsigned long) at be/src/olap/rowset/segment_v2/bloom_filter_index_writer.cpp:250 ``` Co-authored-by: airborne12 --- .../olap/rowset/segment_v2/segment_writer.cpp | 15 +++++- .../segment_v2/vertical_segment_writer.cpp | 15 +++++- .../org/apache/doris/analysis/IndexDef.java | 4 +- .../plans/commands/info/IndexDefinition.java | 4 +- .../test_ngram_bloomfilter_index.groovy | 47 +++++++++++++++++++ 5 files changed, 77 insertions(+), 8 deletions(-) diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index 7690a330ae41fc..7c9e608e828f0f 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -201,8 +201,19 @@ Status SegmentWriter::init(const std::vector& col_ids, bool has_key) { if (tablet_index) { opts.need_bloom_filter = true; opts.is_ngram_bf_index = true; - opts.gram_size = tablet_index->get_gram_size(); - opts.gram_bf_size = tablet_index->get_gram_bf_size(); + //narrow convert from int32_t to uint8_t and uint16_t which is dangerous + auto gram_size = tablet_index->get_gram_size(); + auto gram_bf_size = tablet_index->get_gram_bf_size(); + if (gram_size > 256 || gram_size < 1) { + return Status::NotSupported("Do not support ngram bloom filter for ngram_size: ", + gram_size); + } + if (gram_bf_size > 65535 || gram_bf_size < 64) { + return Status::NotSupported("Do not support ngram bloom filter for bf_size: ", + gram_bf_size); + } + opts.gram_size = gram_size; + opts.gram_bf_size = gram_bf_size; } opts.need_bitmap_index = column.has_bitmap_index(); diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp index 549ad41d2cceaf..2a929f79d507f8 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp @@ -156,8 +156,19 @@ Status VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo if (tablet_index) { opts.need_bloom_filter = true; opts.is_ngram_bf_index = true; - opts.gram_size = tablet_index->get_gram_size(); - opts.gram_bf_size = tablet_index->get_gram_bf_size(); + //narrow convert from int32_t to uint8_t and uint16_t which is dangerous + auto gram_size = tablet_index->get_gram_size(); + auto gram_bf_size = tablet_index->get_gram_bf_size(); + if (gram_size > 256 || gram_size < 1) { + return Status::NotSupported("Do not support ngram bloom filter for ngram_size: ", + gram_size); + } + if (gram_bf_size > 65535 || gram_bf_size < 64) { + return Status::NotSupported("Do not support ngram bloom filter for bf_size: ", + gram_bf_size); + } + opts.gram_size = gram_size; + opts.gram_bf_size = gram_bf_size; } opts.need_bitmap_index = column.has_bitmap_index(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java index fc122cc82a55c0..d57ca506786163 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java @@ -258,8 +258,8 @@ public void checkColumn(Column column, KeysType keysType, boolean enableUniqueKe if (ngramSize > 256 || ngramSize < 1) { throw new AnalysisException("gram_size should be integer and less than 256"); } - if (bfSize > 65536 || bfSize < 64) { - throw new AnalysisException("bf_size should be integer and between 64 and 65536"); + if (bfSize > 65535 || bfSize < 64) { + throw new AnalysisException("bf_size should be integer and between 64 and 65535"); } } catch (NumberFormatException e) { throw new AnalysisException("invalid ngram properties:" + e.getMessage(), e); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java index dc2968f53b881c..cd9c19c25d4bd7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java @@ -145,9 +145,9 @@ public void checkColumn(ColumnDefinition column, KeysType keysType, throw new AnalysisException( "gram_size should be integer and less than 256"); } - if (bfSize > 65536 || bfSize < 64) { + if (bfSize > 65535 || bfSize < 64) { throw new AnalysisException( - "bf_size should be integer and between 64 and 65536"); + "bf_size should be integer and between 64 and 65535"); } } catch (NumberFormatException e) { throw new AnalysisException("invalid ngram properties:" + e.getMessage(), e); diff --git a/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy b/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy index c56eed967a08b0..e2ab9b9c117f1c 100644 --- a/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy +++ b/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy @@ -59,4 +59,51 @@ suite("test_ngram_bloomfilter_index") { qt_select_eq_3 "SELECT * FROM ${tableName} WHERE http_url = '/%/7212503657802320699%' ORDER BY key_id" qt_select_in_3 "SELECT * FROM ${tableName} WHERE http_url IN ('/%/7212503657802320699%') ORDER BY key_id" qt_select_like_3 "SELECT * FROM ${tableName} WHERE http_url like '/%/7212503657802320699%' ORDER BY key_id" + + //case for bf_size 65536 + def tableName2 = 'test_ngram_bloomfilter_index2' + sql "DROP TABLE IF EXISTS ${tableName2}" + test { + sql """ + CREATE TABLE IF NOT EXISTS ${tableName2} ( + `key_id` bigint(20) NULL COMMENT '', + `category` varchar(200) NULL COMMENT '', + `https_url` varchar(300) NULL COMMENT '', + `hostname` varchar(300) NULL, + `http_url` text NULL COMMENT '', + `url_path` varchar(2000) NULL COMMENT '', + `cnt` bigint(20) NULL COMMENT '', + `host_flag` boolean NULL COMMENT '', + INDEX idx_ngrambf (`http_url`) USING NGRAM_BF PROPERTIES("gram_size" = "2", "bf_size" = "65536") + ) ENGINE=OLAP + DUPLICATE KEY(`key_id`, `category`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`key_id`) BUCKETS 3 + PROPERTIES("replication_num" = "1"); + """ + exception "bf_size should be integer and between 64 and 65535" + } + + def tableName3 = 'test_ngram_bloomfilter_index3' + sql "DROP TABLE IF EXISTS ${tableName3}" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName3} ( + `key_id` bigint(20) NULL COMMENT '', + `category` varchar(200) NULL COMMENT '', + `https_url` varchar(300) NULL COMMENT '', + `hostname` varchar(300) NULL, + `http_url` text NULL COMMENT '', + `url_path` varchar(2000) NULL COMMENT '', + `cnt` bigint(20) NULL COMMENT '', + `host_flag` boolean NULL COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`key_id`, `category`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`key_id`) BUCKETS 3 + PROPERTIES("replication_num" = "1"); + """ + test { + sql """ALTER TABLE ${tableName3} ADD INDEX idx_http_url(http_url) USING NGRAM_BF PROPERTIES("gram_size"="3", "bf_size"="65536") COMMENT 'http_url ngram_bf index'""" + exception "bf_size should be integer and between 64 and 65535" + } }