apache · dsmiley · Oct 23, 2021 · Oct 6, 2021 · Oct 22, 2021 · Oct 22, 2021
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -160,6 +160,9 @@ API Changes
 
 * LUCENE-9325: Sort is now final, and the `setSort()` method has been removed (Alan Woodward)
 
+* LUCENE-9431: The UnifiedHighlighter's WEIGHT_MATCHES flag is now set by default, provided its
+  requirements are met.  It can be disabled via over-riding getFlags (Animesh Pandey, David Smiley)
+
 * LUCENE-10158: Add a new interface Unwrappable to the utils package to allow code to
   unwrap wrappers/delegators that are added by Lucene's testing framework. This will allow
   testing new MMapDirectory implementation based on JDK Project Panama. (Uwe Schindler)

diff --git a/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java b/lucene/highlighter/src/java/org/apache/lucene/search/uhighlight/UnifiedHighlighter.java
@@ -86,7 +86,7 @@
  *   <li>{@link #getFormatter(String)}: Customize how snippets are formatted.
  * </ul>
  *
- * <p>This is thread-safe.
+ * <p>This is thread-safe, notwithstanding the setters.
  *
  * @lucene.experimental
  */
@@ -823,6 +823,7 @@ protected static BytesRef[] filterExtractedTerms(
     return filteredTerms.toArray(new BytesRef[filteredTerms.size()]);
   }
 
+  /** Customize the highlighting flags to use by field. */
   protected Set<HighlightFlag> getFlags(String field) {
     Set<HighlightFlag> highlightFlags = EnumSet.noneOf(HighlightFlag.class);
     if (shouldHandleMultiTermQuery(field)) {
@@ -834,6 +835,11 @@ protected Set<HighlightFlag> getFlags(String field) {
     if (shouldPreferPassageRelevancyOverSpeed(field)) {
       highlightFlags.add(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED);
     }
+    if (highlightFlags.contains(HighlightFlag.MULTI_TERM_QUERY)
+        && highlightFlags.contains(HighlightFlag.PHRASES)
+        && highlightFlags.contains(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED)) {
+      highlightFlags.add(HighlightFlag.WEIGHT_MATCHES);
+    }
     return highlightFlags;
   }
 
@@ -1168,9 +1174,11 @@ public enum HighlightFlag {
 
     /**
      * Internally use the {@link Weight#matches(LeafReaderContext, int)} API for highlighting. It's
-     * more accurate to the query, though might not calculate passage relevancy as well. Use of this
-     * flag requires {@link #MULTI_TERM_QUERY} and {@link #PHRASES}. {@link
-     * #PASSAGE_RELEVANCY_OVER_SPEED} will be ignored. False by default.
+     * more accurate to the query, and the snippets can be a little different for phrases because
+     * the whole phrase is marked up instead of each word. The passage relevancy calculation can be
+     * different (maybe worse?) and it's slower when highlighting many fields. Use of this flag
+     * requires {@link #MULTI_TERM_QUERY} and {@link #PHRASES} and {@link
+     * #PASSAGE_RELEVANCY_OVER_SPEED}. True by default because those booleans are true by default.
      */
     WEIGHT_MATCHES
 

diff --git a/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java b/lucene/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighter.java
@@ -460,6 +460,25 @@ public void testBuddhism() throws Exception {
     ir.close();
   }
 
+  public void testHighlighterDefaultFlags() throws Exception {
+    RandomIndexWriter iw = new RandomIndexWriter(random(), dir, indexAnalyzer);
+    Document document = new Document();
+    document.add(new Field("body", "test body", fieldType));
+    iw.addDocument(document);
+    IndexReader ir = iw.getReader();
+    iw.close();
+    IndexSearcher searcher = newSearcher(ir);
+    UnifiedHighlighter highlighter = new UnifiedHighlighter(searcher, indexAnalyzer);
+    Set<HighlightFlag> flags = highlighter.getFlags("body");
+    assertTrue(flags.contains(HighlightFlag.PHRASES));
+    assertTrue(flags.contains(HighlightFlag.MULTI_TERM_QUERY));
+    assertTrue(flags.contains(HighlightFlag.PASSAGE_RELEVANCY_OVER_SPEED));
+    assertTrue(flags.contains(HighlightFlag.WEIGHT_MATCHES));
+    // if more flags are added, bump the number below and add an assertTrue or assertFalse above
+    assertEquals(4, HighlightFlag.values().length);
+    ir.close();
+  }
+
   public void testCuriousGeorge() throws Exception {
     String text =
         "It’s the formula for success for preschoolers—Curious George and fire trucks! "

diff --git a/...e/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java b/...e/highlighter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterMTQ.java
@@ -778,10 +778,15 @@ public Object format(Passage[] passages, String content) {
     snippets = highlighter.highlight("body", query, topDocs);
     assertEquals(1, snippets.length);
 
+    // All flags are enabled.
     assertEquals(
-        "<b>Test(body:te*)</b> a <b>one(body:*one*)</b> <b>sentence(body:zentence~~2)</b> document.",
+        "" + highlighter.getFlags("body"),
+        HighlightFlag.values().length,
+        highlighter.getFlags("body").size());
+    assertEquals(
+        "" + highlighter.getFlags("title"),
+        "<b>Test(body:te*)</b> a <b>one(body:*one*)</b> <b>sentence(sentence)</b> document.",
         snippets[0]);
-
     ir.close();
   }
 

diff --git a/...ter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterTermIntervals.java b/...ter/src/test/org/apache/lucene/search/uhighlight/TestUnifiedHighlighterTermIntervals.java
@@ -27,7 +27,6 @@
 import java.util.EnumSet;
 import java.util.List;
 import java.util.Map;
-import java.util.Set;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import org.apache.lucene.analysis.Analyzer;
@@ -87,47 +86,10 @@ public void doAfter() throws IOException {
 
   static UnifiedHighlighter randomUnifiedHighlighter(
       IndexSearcher searcher, Analyzer indexAnalyzer) {
-    return randomUnifiedHighlighter(
+    return TestUnifiedHighlighter.randomUnifiedHighlighter(
         searcher, indexAnalyzer, EnumSet.noneOf(HighlightFlag.class), null);
   }
 
-  static UnifiedHighlighter randomUnifiedHighlighter(
-      IndexSearcher searcher,
-      Analyzer indexAnalyzer,
-      EnumSet<HighlightFlag> mandatoryFlags,
-      Boolean requireFieldMatch) {
-    final UnifiedHighlighter uh =
-        new UnifiedHighlighter(searcher, indexAnalyzer) {
-          Set<HighlightFlag> flags; // consistently random set of flags for this test run
-
-          @Override
-          protected Set<HighlightFlag> getFlags(String field) {
-            if (flags != null) {
-              return flags;
-            }
-            final EnumSet<HighlightFlag> result = EnumSet.copyOf(mandatoryFlags);
-            int r = random().nextInt();
-            for (HighlightFlag highlightFlag : HighlightFlag.values()) {
-              if (((1 << highlightFlag.ordinal()) & r) == 0) {
-                result.add(highlightFlag);
-              }
-            }
-            if (result.contains(HighlightFlag.WEIGHT_MATCHES)) {
-              // these two are required for WEIGHT_MATCHES
-              result.add(HighlightFlag.MULTI_TERM_QUERY);
-              result.add(HighlightFlag.PHRASES);
-            }
-            return flags = result;
-          }
-        };
-    uh.setCacheFieldValCharsThreshold(random().nextInt(100));
-    if (requireFieldMatch == Boolean.FALSE
-        || (requireFieldMatch == null && random().nextBoolean())) {
-      uh.setFieldMatcher(f -> true); // requireFieldMatch==false
-    }
-    return uh;
-  }
-
   //
   //  Tests below were ported from the PostingsHighlighter. Possibly augmented.  Far below are newer
   // tests.
@@ -1033,15 +995,15 @@ public void testMatchesSlopBug() throws IOException {
     assertEquals(1, topDocs.totalHits.value);
     String[] snippets = highlighter.highlight("title", query, topDocs, 10);
     assertEquals(1, snippets.length);
-    if (highlighter.getFlags("title").contains(HighlightFlag.WEIGHT_MATCHES)) {
-      assertEquals(
-          "" + highlighter.getFlags("title"), "<b>This is the title field</b>.", snippets[0]);
-    } else {
-      assertEquals(
-          "" + highlighter.getFlags("title"),
-          "<b>This</b> <b>is</b> <b>the</b> title <b>field</b>.",
-          snippets[0]);
-    }
+    // All flags are enabled.
+    assertEquals(
+        "" + highlighter.getFlags("title"),
+        HighlightFlag.values().length,
+        highlighter.getFlags("title").size());
+    assertEquals(
+        "" + highlighter.getFlags("title"),
+        "<b>This</b> <b>is</b> <b>the</b> title <b>field</b>.",
+        snippets[0]);
     ir.close();
   }