From 2e639131b3bb88316f53bfeb9262cbcd81606d50 Mon Sep 17 00:00:00 2001 From: gaobinlong Date: Tue, 9 Jul 2024 19:29:44 +0800 Subject: [PATCH] Fix match_phrase_prefix_query not working on text field with multiple values and index_prefixes (#10959) * Fix match_phrase_prefix_query not working on text field with multiple values and index_prefixes Signed-off-by: Gao Binlong * Add more test Signed-off-by: Gao Binlong * modify change log Signed-off-by: Gao Binlong * Fix test failure Signed-off-by: Gao Binlong * Change the indexAnalyzer used by prefix field Signed-off-by: Gao Binlong * Skip old version for yaml test Signed-off-by: Gao Binlong * Optimize some code Signed-off-by: Gao Binlong * Fix test failure Signed-off-by: Gao Binlong * Modify yaml test description Signed-off-by: Gao Binlong * Remove the name parameter for setAnalyzer() Signed-off-by: Gao Binlong --------- Signed-off-by: Gao Binlong --- CHANGELOG.md | 1 + .../test/search/190_index_prefix_search.yml | 52 ++++++++++++++++++- .../index/mapper/TextFieldMapper.java | 28 +++++++--- .../index/mapper/TextFieldMapperTests.java | 51 ++++++++++++++++++ .../index/mapper/TextFieldTypeTests.java | 1 + 5 files changed, 124 insertions(+), 9 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d0990db31d20..2cffa99fb66a5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -49,6 +49,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Fix bug in SBP cancellation logic ([#13259](https://github.com/opensearch-project/OpenSearch/pull/13474)) - Fix handling of Short and Byte data types in ScriptProcessor ingest pipeline ([#14379](https://github.com/opensearch-project/OpenSearch/issues/14379)) - Switch to iterative version of WKT format parser ([#14086](https://github.com/opensearch-project/OpenSearch/pull/14086)) +- Fix match_phrase_prefix_query not working on text field with multiple values and index_prefixes ([#10959](https://github.com/opensearch-project/OpenSearch/pull/10959)) - Fix the computed max shards of cluster to avoid int overflow ([#14155](https://github.com/opensearch-project/OpenSearch/pull/14155)) - Fixed rest-high-level client searchTemplate & mtermVectors endpoints to have a leading slash ([#14465](https://github.com/opensearch-project/OpenSearch/pull/14465)) - Write shard level metadata blob when snapshotting searchable snapshot indexes ([#13190](https://github.com/opensearch-project/OpenSearch/pull/13190)) diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/190_index_prefix_search.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/190_index_prefix_search.yml index 25d3dd160e031..8b031c132f979 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search/190_index_prefix_search.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search/190_index_prefix_search.yml @@ -10,7 +10,12 @@ setup: index_prefixes: min_chars: 2 max_chars: 5 - + text_with_pos_inc_gap: + type: text + position_increment_gap: 201 + index_prefixes: + min_chars: 2 + max_chars: 5 - do: index: index: test @@ -23,6 +28,18 @@ setup: id: 2 body: { text: sentence with UPPERCASE WORDS } + - do: + index: + index: test + id: 3 + body: { text: ["foo", "b-12"] } + + - do: + index: + index: test + id: 4 + body: { text_with_pos_inc_gap: ["foo", "b-12"] } + - do: indices.refresh: index: [test] @@ -116,3 +133,36 @@ setup: ] - match: {hits.total: 1} + +# related issue: https://github.com/opensearch-project/OpenSearch/issues/9203 +--- +"search index prefixes with multiple values": + - skip: + version: " - 2.99.99" + reason: "the bug was fixed in 3.0.0" + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase_prefix: + text: "b-12" + + - match: {hits.total: 1} + +--- +"search index prefixes with multiple values and custom position_increment_gap": + - skip: + version: " - 2.99.99" + reason: "the bug was fixed in 3.0.0" + - do: + search: + rest_total_hits_as_int: true + index: test + body: + query: + match_phrase_prefix: + text_with_pos_inc_gap: "b-12" + + - match: {hits.total: 1} diff --git a/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java index d0e041e68a81d..ba053a3aeee1d 100644 --- a/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java +++ b/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java @@ -448,7 +448,6 @@ protected PrefixFieldMapper buildPrefixMapper(BuilderContext context, FieldType pft.setStoreTermVectorOffsets(true); } PrefixFieldType prefixFieldType = new PrefixFieldType(tft, fullName + "._index_prefix", indexPrefixes.get()); - prefixFieldType.setAnalyzer(analyzers.getIndexAnalyzer()); tft.setPrefixFieldType(prefixFieldType); return new PrefixFieldMapper(pft, prefixFieldType); } @@ -522,12 +521,14 @@ private static class PrefixWrappedAnalyzer extends AnalyzerWrapper { private final int minChars; private final int maxChars; private final Analyzer delegate; + private final int positionIncrementGap; - PrefixWrappedAnalyzer(Analyzer delegate, int minChars, int maxChars) { + PrefixWrappedAnalyzer(Analyzer delegate, int minChars, int maxChars, int positionIncrementGap) { super(delegate.getReuseStrategy()); this.delegate = delegate; this.minChars = minChars; this.maxChars = maxChars; + this.positionIncrementGap = positionIncrementGap; } @Override @@ -535,6 +536,11 @@ protected Analyzer getWrappedAnalyzer(String fieldName) { return delegate; } + @Override + public int getPositionIncrementGap(String fieldName) { + return positionIncrementGap; + } + @Override protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) { TokenFilter filter = new EdgeNGramTokenFilter(components.getTokenStream(), minChars, maxChars, false); @@ -588,17 +594,18 @@ static final class PrefixFieldType extends StringFieldType { final int minChars; final int maxChars; - final TextFieldType parentField; + final TextFieldType parent; PrefixFieldType(TextFieldType parentField, String name, PrefixConfig config) { this(parentField, name, config.minChars, config.maxChars); } - PrefixFieldType(TextFieldType parentField, String name, int minChars, int maxChars) { - super(name, true, false, false, parentField.getTextSearchInfo(), Collections.emptyMap()); + PrefixFieldType(TextFieldType parent, String name, int minChars, int maxChars) { + super(name, true, false, false, parent.getTextSearchInfo(), Collections.emptyMap()); this.minChars = minChars; this.maxChars = maxChars; - this.parentField = parentField; + this.parent = parent; + setAnalyzer(parent.indexAnalyzer()); } @Override @@ -609,8 +616,13 @@ public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchL } void setAnalyzer(NamedAnalyzer delegate) { + String analyzerName = delegate.name(); setIndexAnalyzer( - new NamedAnalyzer(delegate.name(), AnalyzerScope.INDEX, new PrefixWrappedAnalyzer(delegate.analyzer(), minChars, maxChars)) + new NamedAnalyzer( + analyzerName, + AnalyzerScope.INDEX, + new PrefixWrappedAnalyzer(delegate.analyzer(), minChars, maxChars, delegate.getPositionIncrementGap(analyzerName)) + ) ); } @@ -639,7 +651,7 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, bool Automaton automaton = Operations.concatenate(automata); AutomatonQuery query = AutomatonQueries.createAutomatonQuery(new Term(name(), value + "*"), automaton, method); return new BooleanQuery.Builder().add(query, BooleanClause.Occur.SHOULD) - .add(new TermQuery(new Term(parentField.name(), value)), BooleanClause.Occur.SHOULD) + .add(new TermQuery(new Term(parent.name(), value)), BooleanClause.Occur.SHOULD) .build(); } diff --git a/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java index a22bfa5e845b1..0253caea9759d 100644 --- a/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java @@ -380,6 +380,57 @@ public void testIndexOptions() throws IOException { } } + public void testPositionIncrementGapOnIndexPrefixField() throws IOException { + // test default position_increment_gap + MapperService mapperService = createMapperService( + fieldMapping(b -> b.field("type", "text").field("analyzer", "default").startObject("index_prefixes").endObject()) + ); + ParsedDocument doc = mapperService.documentMapper().parse(source(b -> b.array("field", new String[] { "a", "b 12" }))); + + withLuceneIndex(mapperService, iw -> iw.addDocument(doc.rootDoc()), reader -> { + TermsEnum terms = getOnlyLeafReader(reader).terms("field").iterator(); + assertTrue(terms.seekExact(new BytesRef("12"))); + PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS); + assertEquals(0, postings.nextDoc()); + assertEquals(TextFieldMapper.Defaults.POSITION_INCREMENT_GAP + 2, postings.nextPosition()); + }); + + withLuceneIndex(mapperService, iw -> iw.addDocument(doc.rootDoc()), reader -> { + TermsEnum terms = getOnlyLeafReader(reader).terms("field._index_prefix").iterator(); + assertTrue(terms.seekExact(new BytesRef("12"))); + PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS); + assertEquals(0, postings.nextDoc()); + assertEquals(TextFieldMapper.Defaults.POSITION_INCREMENT_GAP + 2, postings.nextPosition()); + }); + + // test custom position_increment_gap + final int positionIncrementGap = randomIntBetween(1, 1000); + MapperService mapperService2 = createMapperService( + fieldMapping( + b -> b.field("type", "text") + .field("position_increment_gap", positionIncrementGap) + .field("analyzer", "default") + .startObject("index_prefixes") + .endObject() + ) + ); + ParsedDocument doc2 = mapperService2.documentMapper().parse(source(b -> b.array("field", new String[] { "a", "b 12" }))); + withLuceneIndex(mapperService2, iw -> iw.addDocument(doc2.rootDoc()), reader -> { + TermsEnum terms = getOnlyLeafReader(reader).terms("field").iterator(); + assertTrue(terms.seekExact(new BytesRef("12"))); + PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS); + assertEquals(0, postings.nextDoc()); + assertEquals(positionIncrementGap + 2, postings.nextPosition()); + }); + withLuceneIndex(mapperService2, iw -> iw.addDocument(doc2.rootDoc()), reader -> { + TermsEnum terms = getOnlyLeafReader(reader).terms("field._index_prefix").iterator(); + assertTrue(terms.seekExact(new BytesRef("12"))); + PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS); + assertEquals(0, postings.nextDoc()); + assertEquals(positionIncrementGap + 2, postings.nextPosition()); + }); + } + public void testDefaultPositionIncrementGap() throws IOException { MapperService mapperService = createMapperService(fieldMapping(this::minimalMapping)); ParsedDocument doc = mapperService.documentMapper().parse(source(b -> b.array("field", new String[] { "a", "b" }))); diff --git a/server/src/test/java/org/opensearch/index/mapper/TextFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/TextFieldTypeTests.java index 9c177bbec61fd..e672f94819541 100644 --- a/server/src/test/java/org/opensearch/index/mapper/TextFieldTypeTests.java +++ b/server/src/test/java/org/opensearch/index/mapper/TextFieldTypeTests.java @@ -167,6 +167,7 @@ public void testFuzzyQuery() { public void testIndexPrefixes() { TextFieldType ft = createFieldType(true); + ft.setIndexAnalyzer(Lucene.STANDARD_ANALYZER); ft.setPrefixFieldType(new TextFieldMapper.PrefixFieldType(ft, "field._index_prefix", 2, 10)); Query q = ft.prefixQuery("goin", CONSTANT_SCORE_REWRITE, false, randomMockShardContext());