Skip to content

Commit

Permalink
[opt](inverted index) ignore_above only affects untokenized strings
Browse files Browse the repository at this point in the history
  • Loading branch information
qidaye committed Dec 21, 2023
1 parent 5c469a8 commit 61497eb
Show file tree
Hide file tree
Showing 9 changed files with 26 additions and 20 deletions.
14 changes: 10 additions & 4 deletions be/src/olap/rowset/segment_v2/inverted_index_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -299,8 +299,11 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
auto ignore_above_value =
get_parser_ignore_above_value_from_properties(_index_meta->properties());
auto ignore_above = std::stoi(ignore_above_value);
if (v->get_size() > ignore_above) {
VLOG_DEBUG << "fulltext index value length can be at most 256, but got "
// only ignore_above UNTOKENIZED strings
if (_parser_type == InvertedIndexParserType::PARSER_NONE &&
v->get_size() > ignore_above) {
VLOG_DEBUG << "fulltext index value length can be at most "
<< ignore_above_value << ", but got "
<< "value length:" << v->get_size() << ", ignore this value";
new_fulltext_field(empty_value.c_str(), 0);
RETURN_IF_ERROR(add_null_document());
Expand Down Expand Up @@ -349,8 +352,11 @@ class InvertedIndexColumnWriterImpl : public InvertedIndexColumnWriter {
auto ignore_above_value =
get_parser_ignore_above_value_from_properties(_index_meta->properties());
auto ignore_above = std::stoi(ignore_above_value);
if (value.length() > ignore_above) {
VLOG_DEBUG << "fulltext index value length can be at most 256, but got "
// only ignore_above UNTOKENIZED strings
if (_parser_type == InvertedIndexParserType::PARSER_NONE &&
value.length() > ignore_above) {
VLOG_DEBUG << "fulltext index value length can be at most "
<< ignore_above_value << ", but got "
<< "value length:" << value.length() << ", ignore this value";
new_fulltext_field(empty_value.c_str(), 0);
RETURN_IF_ERROR(add_null_document());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ CREATE TABLE IF NOT EXISTS large_records_t1_dk (
FTS_DOC_ID BIGINT NOT NULL,
a TEXT,
b TEXT,
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'b_idx'
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'b_idx'
)
DUPLICATE KEY(FTS_DOC_ID)
DISTRIBUTED BY HASH(FTS_DOC_ID) BUCKETS 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ CREATE TABLE IF NOT EXISTS large_records_t1_uk (
FTS_DOC_ID BIGINT NOT NULL,
a TEXT,
b TEXT,
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'b_idx'
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'b_idx'
)
UNIQUE KEY(FTS_DOC_ID)
DISTRIBUTED BY HASH(FTS_DOC_ID) BUCKETS 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ CREATE TABLE IF NOT EXISTS large_records_t2_dk (
FTS_DOC_ID BIGINT NOT NULL,
a TEXT,
b TEXT,
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'b_idx'
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'b_idx'
)
DUPLICATE KEY(FTS_DOC_ID)
DISTRIBUTED BY HASH(FTS_DOC_ID) BUCKETS 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ CREATE TABLE IF NOT EXISTS large_records_t2_uk (
FTS_DOC_ID BIGINT NOT NULL,
a TEXT,
b TEXT,
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'b_idx'
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'b_idx'
)
UNIQUE KEY(FTS_DOC_ID)
DISTRIBUTED BY HASH(FTS_DOC_ID) BUCKETS 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ CREATE TABLE IF NOT EXISTS large_records_t3_dk (
FTS_DOC_ID BIGINT NOT NULL,
a TEXT,
b TEXT,
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'b_idx'
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'b_idx'
)
DUPLICATE KEY(FTS_DOC_ID)
DISTRIBUTED BY HASH(FTS_DOC_ID) BUCKETS 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ CREATE TABLE IF NOT EXISTS large_records_t3_uk (
FTS_DOC_ID BIGINT NOT NULL,
a TEXT,
b TEXT,
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'b_idx'
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'b_idx'
)
UNIQUE KEY(FTS_DOC_ID)
DISTRIBUTED BY HASH(FTS_DOC_ID) BUCKETS 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ CREATE TABLE IF NOT EXISTS large_records_t4_dk (
FTS_DOC_ID BIGINT NOT NULL,
a TEXT,
b TEXT,
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'b_idx'
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'b_idx'
)
DUPLICATE KEY(FTS_DOC_ID)
DISTRIBUTED BY HASH(FTS_DOC_ID) BUCKETS 3
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ CREATE TABLE IF NOT EXISTS large_records_t4_uk (
FTS_DOC_ID BIGINT NOT NULL,
a TEXT,
b TEXT,
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard", "ignore_above"="2000") COMMENT 'b_idx'
INDEX a_idx (a) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'a_idx',
INDEX b_idx (b) USING INVERTED PROPERTIES("parser"="standard") COMMENT 'b_idx'
)
UNIQUE KEY(FTS_DOC_ID)
DISTRIBUTED BY HASH(FTS_DOC_ID) BUCKETS 3
Expand Down

0 comments on commit 61497eb

Please sign in to comment.