Fix match_phrase_prefix_query not working on text field with multiple…

… values and index_prefixes (#10959) * Fix match_phrase_prefix_query not working on text field with multiple values and index_prefixes Signed-off-by: Gao Binlong <gbinlong@amazon.com> * Add more test Signed-off-by: Gao Binlong <gbinlong@amazon.com> * modify change log Signed-off-by: Gao Binlong <gbinlong@amazon.com> * Fix test failure Signed-off-by: Gao Binlong <gbinlong@amazon.com> * Change the indexAnalyzer used by prefix field Signed-off-by: Gao Binlong <gbinlong@amazon.com> * Skip old version for yaml test Signed-off-by: Gao Binlong <gbinlong@amazon.com> * Optimize some code Signed-off-by: Gao Binlong <gbinlong@amazon.com> * Fix test failure Signed-off-by: Gao Binlong <gbinlong@amazon.com> * Modify yaml test description Signed-off-by: Gao Binlong <gbinlong@amazon.com> * Remove the name parameter for setAnalyzer() Signed-off-by: Gao Binlong <gbinlong@amazon.com> --------- Signed-off-by: Gao Binlong <gbinlong@amazon.com> (cherry picked from commit 2e63913) Signed-off-by: github-actions[bot] <github-actions[bot]@users.noreply.github.com>
opensearch-project · Jul 9, 2024 · 297f2ba · 297f2ba
1 parent 3fde6aa
commit 297f2ba
Show file tree

Hide file tree

Showing 5 changed files with 124 additions and 9 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -47,6 +47,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 - Fix bug in SBP cancellation logic ([#13259](https://github.com/opensearch-project/OpenSearch/pull/13474))
 - Fix handling of Short and Byte data types in ScriptProcessor ingest pipeline ([#14379](https://github.com/opensearch-project/OpenSearch/issues/14379))
 - Switch to iterative version of WKT format parser ([#14086](https://github.com/opensearch-project/OpenSearch/pull/14086))
+- Fix match_phrase_prefix_query not working on text field with multiple values and index_prefixes ([#10959](https://github.com/opensearch-project/OpenSearch/pull/10959))
 - Fix the computed max shards of cluster to avoid int overflow ([#14155](https://github.com/opensearch-project/OpenSearch/pull/14155))
 - Fixed rest-high-level client searchTemplate & mtermVectors endpoints to have a leading slash ([#14465](https://github.com/opensearch-project/OpenSearch/pull/14465))
 - Write shard level metadata blob when snapshotting searchable snapshot indexes ([#13190](https://github.com/opensearch-project/OpenSearch/pull/13190))

diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search/190_index_prefix_search.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search/190_index_prefix_search.yml
@@ -10,7 +10,12 @@ setup:
                 index_prefixes:
                   min_chars: 2
                   max_chars: 5
-
+              text_with_pos_inc_gap:
+                type: text
+                position_increment_gap: 201
+                index_prefixes:
+                  min_chars: 2
+                  max_chars: 5
   - do:
       index:
           index:  test
@@ -23,6 +28,18 @@ setup:
         id:     2
         body:   { text: sentence with UPPERCASE WORDS }
 
+  - do:
+      index:
+        index:  test
+        id:     3
+        body:   { text: ["foo", "b-12"] }
+
+  - do:
+      index:
+        index:  test
+        id:     4
+        body:   { text_with_pos_inc_gap: ["foo", "b-12"] }
+
   - do:
       indices.refresh:
         index: [test]
@@ -116,3 +133,36 @@ setup:
               ]
 
   - match: {hits.total: 1}
+
+# related issue: https://github.com/opensearch-project/OpenSearch/issues/9203
+---
+"search index prefixes with multiple values":
+  - skip:
+      version: " - 2.99.99"
+      reason: "the bug was fixed in 3.0.0"
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        index: test
+        body:
+          query:
+            match_phrase_prefix:
+              text: "b-12"
+
+  - match: {hits.total: 1}
+
+---
+"search index prefixes with multiple values and custom position_increment_gap":
+  - skip:
+      version: " - 2.99.99"
+      reason: "the bug was fixed in 3.0.0"
+  - do:
+      search:
+        rest_total_hits_as_int: true
+        index: test
+        body:
+          query:
+            match_phrase_prefix:
+              text_with_pos_inc_gap: "b-12"
+
+  - match: {hits.total: 1}
diff --git a/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java b/server/src/main/java/org/opensearch/index/mapper/TextFieldMapper.java
@@ -449,7 +449,6 @@ protected PrefixFieldMapper buildPrefixMapper(BuilderContext context, FieldType
                 pft.setStoreTermVectorOffsets(true);
             }
             PrefixFieldType prefixFieldType = new PrefixFieldType(tft, fullName + "._index_prefix", indexPrefixes.get());
-            prefixFieldType.setAnalyzer(analyzers.getIndexAnalyzer());
             tft.setPrefixFieldType(prefixFieldType);
             return new PrefixFieldMapper(pft, prefixFieldType);
         }
@@ -523,19 +522,26 @@ private static class PrefixWrappedAnalyzer extends AnalyzerWrapper {
         private final int minChars;
         private final int maxChars;
         private final Analyzer delegate;
+        private final int positionIncrementGap;
 
-        PrefixWrappedAnalyzer(Analyzer delegate, int minChars, int maxChars) {
+        PrefixWrappedAnalyzer(Analyzer delegate, int minChars, int maxChars, int positionIncrementGap) {
             super(delegate.getReuseStrategy());
             this.delegate = delegate;
             this.minChars = minChars;
             this.maxChars = maxChars;
+            this.positionIncrementGap = positionIncrementGap;
         }
 
         @Override
         protected Analyzer getWrappedAnalyzer(String fieldName) {
             return delegate;
         }
 
+        @Override
+        public int getPositionIncrementGap(String fieldName) {
+            return positionIncrementGap;
+        }
+
         @Override
         protected TokenStreamComponents wrapComponents(String fieldName, TokenStreamComponents components) {
             TokenFilter filter = new EdgeNGramTokenFilter(components.getTokenStream(), minChars, maxChars, false);
@@ -589,17 +595,18 @@ static final class PrefixFieldType extends StringFieldType {
 
         final int minChars;
         final int maxChars;
-        final TextFieldType parentField;
+        final TextFieldType parent;
 
         PrefixFieldType(TextFieldType parentField, String name, PrefixConfig config) {
             this(parentField, name, config.minChars, config.maxChars);
         }
 
-        PrefixFieldType(TextFieldType parentField, String name, int minChars, int maxChars) {
-            super(name, true, false, false, parentField.getTextSearchInfo(), Collections.emptyMap());
+        PrefixFieldType(TextFieldType parent, String name, int minChars, int maxChars) {
+            super(name, true, false, false, parent.getTextSearchInfo(), Collections.emptyMap());
             this.minChars = minChars;
             this.maxChars = maxChars;
-            this.parentField = parentField;
+            this.parent = parent;
+            setAnalyzer(parent.indexAnalyzer());
         }
 
         @Override
@@ -610,8 +617,13 @@ public ValueFetcher valueFetcher(QueryShardContext context, SearchLookup searchL
         }
 
         void setAnalyzer(NamedAnalyzer delegate) {
+            String analyzerName = delegate.name();
             setIndexAnalyzer(
-                new NamedAnalyzer(delegate.name(), AnalyzerScope.INDEX, new PrefixWrappedAnalyzer(delegate.analyzer(), minChars, maxChars))
+                new NamedAnalyzer(
+                    analyzerName,
+                    AnalyzerScope.INDEX,
+                    new PrefixWrappedAnalyzer(delegate.analyzer(), minChars, maxChars, delegate.getPositionIncrementGap(analyzerName))
+                )
             );
         }
 
@@ -640,7 +652,7 @@ public Query prefixQuery(String value, MultiTermQuery.RewriteMethod method, bool
             Automaton automaton = Operations.concatenate(automata);
             AutomatonQuery query = AutomatonQueries.createAutomatonQuery(new Term(name(), value + "*"), automaton, method);
             return new BooleanQuery.Builder().add(query, BooleanClause.Occur.SHOULD)
-                .add(new TermQuery(new Term(parentField.name(), value)), BooleanClause.Occur.SHOULD)
+                .add(new TermQuery(new Term(parent.name(), value)), BooleanClause.Occur.SHOULD)
                 .build();
         }
 

diff --git a/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java b/server/src/test/java/org/opensearch/index/mapper/TextFieldMapperTests.java
@@ -380,6 +380,57 @@ public void testIndexOptions() throws IOException {
         }
     }
 
+    public void testPositionIncrementGapOnIndexPrefixField() throws IOException {
+        // test default position_increment_gap
+        MapperService mapperService = createMapperService(
+            fieldMapping(b -> b.field("type", "text").field("analyzer", "default").startObject("index_prefixes").endObject())
+        );
+        ParsedDocument doc = mapperService.documentMapper().parse(source(b -> b.array("field", new String[] { "a", "b 12" })));
+
+        withLuceneIndex(mapperService, iw -> iw.addDocument(doc.rootDoc()), reader -> {
+            TermsEnum terms = getOnlyLeafReader(reader).terms("field").iterator();
+            assertTrue(terms.seekExact(new BytesRef("12")));
+            PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
+            assertEquals(0, postings.nextDoc());
+            assertEquals(TextFieldMapper.Defaults.POSITION_INCREMENT_GAP + 2, postings.nextPosition());
+        });
+
+        withLuceneIndex(mapperService, iw -> iw.addDocument(doc.rootDoc()), reader -> {
+            TermsEnum terms = getOnlyLeafReader(reader).terms("field._index_prefix").iterator();
+            assertTrue(terms.seekExact(new BytesRef("12")));
+            PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
+            assertEquals(0, postings.nextDoc());
+            assertEquals(TextFieldMapper.Defaults.POSITION_INCREMENT_GAP + 2, postings.nextPosition());
+        });
+
+        // test custom position_increment_gap
+        final int positionIncrementGap = randomIntBetween(1, 1000);
+        MapperService mapperService2 = createMapperService(
+            fieldMapping(
+                b -> b.field("type", "text")
+                    .field("position_increment_gap", positionIncrementGap)
+                    .field("analyzer", "default")
+                    .startObject("index_prefixes")
+                    .endObject()
+            )
+        );
+        ParsedDocument doc2 = mapperService2.documentMapper().parse(source(b -> b.array("field", new String[] { "a", "b 12" })));
+        withLuceneIndex(mapperService2, iw -> iw.addDocument(doc2.rootDoc()), reader -> {
+            TermsEnum terms = getOnlyLeafReader(reader).terms("field").iterator();
+            assertTrue(terms.seekExact(new BytesRef("12")));
+            PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
+            assertEquals(0, postings.nextDoc());
+            assertEquals(positionIncrementGap + 2, postings.nextPosition());
+        });
+        withLuceneIndex(mapperService2, iw -> iw.addDocument(doc2.rootDoc()), reader -> {
+            TermsEnum terms = getOnlyLeafReader(reader).terms("field._index_prefix").iterator();
+            assertTrue(terms.seekExact(new BytesRef("12")));
+            PostingsEnum postings = terms.postings(null, PostingsEnum.POSITIONS);
+            assertEquals(0, postings.nextDoc());
+            assertEquals(positionIncrementGap + 2, postings.nextPosition());
+        });
+    }
+
     public void testDefaultPositionIncrementGap() throws IOException {
         MapperService mapperService = createMapperService(fieldMapping(this::minimalMapping));
         ParsedDocument doc = mapperService.documentMapper().parse(source(b -> b.array("field", new String[] { "a", "b" })));

diff --git a/server/src/test/java/org/opensearch/index/mapper/TextFieldTypeTests.java b/server/src/test/java/org/opensearch/index/mapper/TextFieldTypeTests.java
@@ -167,6 +167,7 @@ public void testFuzzyQuery() {
 
     public void testIndexPrefixes() {
         TextFieldType ft = createFieldType(true);
+        ft.setIndexAnalyzer(Lucene.STANDARD_ANALYZER);
         ft.setPrefixFieldType(new TextFieldMapper.PrefixFieldType(ft, "field._index_prefix", 2, 10));
 
         Query q = ft.prefixQuery("goin", CONSTANT_SCORE_REWRITE, false, randomMockShardContext());