Skip to content

Commit

Permalink
Add BytesRefIterator to TermInSetQuery (#13806)
Browse files Browse the repository at this point in the history
TermInSetQuery used to have an accessor to its terms that was removed in #12173
to protect leaking internal encoding details. This introduces an accessor to the
term data in the query that doesn't expose internals but merely allows iterating
over the decoded BytesRef, making inspection of the querys content possible again.

Closes #13804
  • Loading branch information
cbuescher authored and javanna committed Sep 19, 2024
1 parent b467a2b commit a7ce346
Show file tree
Hide file tree
Showing 3 changed files with 31 additions and 8 deletions.
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@ http://s.apache.org/luceneversions
API Changes
---------------------

* GITHUB#13806: Add TermInSetQuery#getBytesRefIterator to be able to iterate over query terms. (Christoph Büscher)

* GITHUB#13469: Expose FlatVectorsFormat as a first-class format; can be configured using a custom Codec. (Michael Sokolov)

* GITHUB#13612: Hunspell: add Suggester#proceedPastRep to avoid losing relevant suggestions. (Peter Gromov)
Expand Down
21 changes: 13 additions & 8 deletions lucene/core/src/java/org/apache/lucene/search/TermInSetQuery.java
Original file line number Diff line number Diff line change
Expand Up @@ -28,13 +28,7 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
import org.apache.lucene.util.BytesRefComparator;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.StringSorter;
import org.apache.lucene.util.*;
import org.apache.lucene.util.automaton.Automata;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.ByteRunAutomaton;
Expand Down Expand Up @@ -161,6 +155,16 @@ public long getTermsCount() throws IOException {
return termData.size();
}

/**
* Get an iterator over the encoded terms for query inspection.
*
* @lucene.experimental
*/
public BytesRefIterator getBytesRefIterator() {
final TermIterator iterator = this.termData.iterator();
return () -> iterator.next();
}

@Override
public void visit(QueryVisitor visitor) {
if (visitor.acceptField(field) == false) {
Expand Down Expand Up @@ -207,7 +211,8 @@ public int hashCode() {
* Returns the terms wrapped in a PrefixCodedTerms.
*
* @deprecated the encoded terms will no longer be exposed in a future major version; this is an
* implementation detail that could change at some point and shouldn't be relied on directly
* implementation detail that could change at some point and shouldn't be relied on directly.
* Call {@link #getBytesRefIterator()} instead to retrieve over the terms.
*/
@Deprecated
public PrefixCodedTerms getTermData() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import org.apache.lucene.tests.util.RamUsageTester;
import org.apache.lucene.tests.util.TestUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefIterator;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.automaton.ByteRunAutomaton;

Expand Down Expand Up @@ -474,4 +475,19 @@ public void consumeTermsMatching(
}
});
}

public void testTermsIterator() throws IOException {
TermInSetQuery empty = new TermInSetQuery("field", Collections.emptyList());
BytesRefIterator it = empty.getBytesRefIterator();
assertNull(it.next());

TermInSetQuery query =
new TermInSetQuery(
"field", List.of(newBytesRef("term1"), newBytesRef("term2"), newBytesRef("term3")));
it = query.getBytesRefIterator();
assertEquals(newBytesRef("term1"), it.next());
assertEquals(newBytesRef("term2"), it.next());
assertEquals(newBytesRef("term3"), it.next());
assertNull(it.next());
}
}

0 comments on commit a7ce346

Please sign in to comment.