diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index 7690a330ae41fc..7c9e608e828f0f 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -201,8 +201,19 @@ Status SegmentWriter::init(const std::vector& col_ids, bool has_key) { if (tablet_index) { opts.need_bloom_filter = true; opts.is_ngram_bf_index = true; - opts.gram_size = tablet_index->get_gram_size(); - opts.gram_bf_size = tablet_index->get_gram_bf_size(); + //narrow convert from int32_t to uint8_t and uint16_t which is dangerous + auto gram_size = tablet_index->get_gram_size(); + auto gram_bf_size = tablet_index->get_gram_bf_size(); + if (gram_size > 256 || gram_size < 1) { + return Status::NotSupported("Do not support ngram bloom filter for ngram_size: ", + gram_size); + } + if (gram_bf_size > 65535 || gram_bf_size < 64) { + return Status::NotSupported("Do not support ngram bloom filter for bf_size: ", + gram_bf_size); + } + opts.gram_size = gram_size; + opts.gram_bf_size = gram_bf_size; } opts.need_bitmap_index = column.has_bitmap_index(); diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp index 549ad41d2cceaf..2a929f79d507f8 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp @@ -156,8 +156,19 @@ Status VerticalSegmentWriter::_create_column_writer(uint32_t cid, const TabletCo if (tablet_index) { opts.need_bloom_filter = true; opts.is_ngram_bf_index = true; - opts.gram_size = tablet_index->get_gram_size(); - opts.gram_bf_size = tablet_index->get_gram_bf_size(); + //narrow convert from int32_t to uint8_t and uint16_t which is dangerous + auto gram_size = tablet_index->get_gram_size(); + auto gram_bf_size = tablet_index->get_gram_bf_size(); + if (gram_size > 256 || gram_size < 1) { + return Status::NotSupported("Do not support ngram bloom filter for ngram_size: ", + gram_size); + } + if (gram_bf_size > 65535 || gram_bf_size < 64) { + return Status::NotSupported("Do not support ngram bloom filter for bf_size: ", + gram_bf_size); + } + opts.gram_size = gram_size; + opts.gram_bf_size = gram_bf_size; } opts.need_bitmap_index = column.has_bitmap_index(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java index fc122cc82a55c0..d57ca506786163 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/IndexDef.java @@ -258,8 +258,8 @@ public void checkColumn(Column column, KeysType keysType, boolean enableUniqueKe if (ngramSize > 256 || ngramSize < 1) { throw new AnalysisException("gram_size should be integer and less than 256"); } - if (bfSize > 65536 || bfSize < 64) { - throw new AnalysisException("bf_size should be integer and between 64 and 65536"); + if (bfSize > 65535 || bfSize < 64) { + throw new AnalysisException("bf_size should be integer and between 64 and 65535"); } } catch (NumberFormatException e) { throw new AnalysisException("invalid ngram properties:" + e.getMessage(), e); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java index dc2968f53b881c..cd9c19c25d4bd7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/IndexDefinition.java @@ -145,9 +145,9 @@ public void checkColumn(ColumnDefinition column, KeysType keysType, throw new AnalysisException( "gram_size should be integer and less than 256"); } - if (bfSize > 65536 || bfSize < 64) { + if (bfSize > 65535 || bfSize < 64) { throw new AnalysisException( - "bf_size should be integer and between 64 and 65536"); + "bf_size should be integer and between 64 and 65535"); } } catch (NumberFormatException e) { throw new AnalysisException("invalid ngram properties:" + e.getMessage(), e); diff --git a/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy b/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy index c56eed967a08b0..e2ab9b9c117f1c 100644 --- a/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy +++ b/regression-test/suites/index_p0/test_ngram_bloomfilter_index.groovy @@ -59,4 +59,51 @@ suite("test_ngram_bloomfilter_index") { qt_select_eq_3 "SELECT * FROM ${tableName} WHERE http_url = '/%/7212503657802320699%' ORDER BY key_id" qt_select_in_3 "SELECT * FROM ${tableName} WHERE http_url IN ('/%/7212503657802320699%') ORDER BY key_id" qt_select_like_3 "SELECT * FROM ${tableName} WHERE http_url like '/%/7212503657802320699%' ORDER BY key_id" + + //case for bf_size 65536 + def tableName2 = 'test_ngram_bloomfilter_index2' + sql "DROP TABLE IF EXISTS ${tableName2}" + test { + sql """ + CREATE TABLE IF NOT EXISTS ${tableName2} ( + `key_id` bigint(20) NULL COMMENT '', + `category` varchar(200) NULL COMMENT '', + `https_url` varchar(300) NULL COMMENT '', + `hostname` varchar(300) NULL, + `http_url` text NULL COMMENT '', + `url_path` varchar(2000) NULL COMMENT '', + `cnt` bigint(20) NULL COMMENT '', + `host_flag` boolean NULL COMMENT '', + INDEX idx_ngrambf (`http_url`) USING NGRAM_BF PROPERTIES("gram_size" = "2", "bf_size" = "65536") + ) ENGINE=OLAP + DUPLICATE KEY(`key_id`, `category`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`key_id`) BUCKETS 3 + PROPERTIES("replication_num" = "1"); + """ + exception "bf_size should be integer and between 64 and 65535" + } + + def tableName3 = 'test_ngram_bloomfilter_index3' + sql "DROP TABLE IF EXISTS ${tableName3}" + sql """ + CREATE TABLE IF NOT EXISTS ${tableName3} ( + `key_id` bigint(20) NULL COMMENT '', + `category` varchar(200) NULL COMMENT '', + `https_url` varchar(300) NULL COMMENT '', + `hostname` varchar(300) NULL, + `http_url` text NULL COMMENT '', + `url_path` varchar(2000) NULL COMMENT '', + `cnt` bigint(20) NULL COMMENT '', + `host_flag` boolean NULL COMMENT '' + ) ENGINE=OLAP + DUPLICATE KEY(`key_id`, `category`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`key_id`) BUCKETS 3 + PROPERTIES("replication_num" = "1"); + """ + test { + sql """ALTER TABLE ${tableName3} ADD INDEX idx_http_url(http_url) USING NGRAM_BF PROPERTIES("gram_size"="3", "bf_size"="65536") COMMENT 'http_url ngram_bf index'""" + exception "bf_size should be integer and between 64 and 65535" + } }