Skip to content

Commit

Permalink
Reduce specialization in TopScoreDocCollector. (#14038)
Browse files Browse the repository at this point in the history
The specialization of `SimpleCollector` vs. `PagingCollector` only helps save a
null check, so it's probably not worth the complexity. Benchmarks cannot see a
difference with this change.
  • Loading branch information
jpountz authored Dec 4, 2024
1 parent b758602 commit 3fcadaf
Show file tree
Hide file tree
Showing 3 changed files with 128 additions and 216 deletions.
329 changes: 123 additions & 206 deletions lucene/core/src/java/org/apache/lucene/search/TopScoreDocCollector.java
Original file line number Diff line number Diff line change
Expand Up @@ -29,241 +29,158 @@
* <p><b>NOTE</b>: The values {@link Float#NaN} and {@link Float#NEGATIVE_INFINITY} are not valid
* scores. This collector will not properly collect hits with such scores.
*/
public abstract class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {
public class TopScoreDocCollector extends TopDocsCollector<ScoreDoc> {

/** Scorable leaf collector */
public abstract static class ScorerLeafCollector implements LeafCollector {

protected Scorable scorer;
private final ScoreDoc after;
final int totalHitsThreshold;
final MaxScoreAccumulator minScoreAcc;

@Override
public void setScorer(Scorable scorer) throws IOException {
this.scorer = scorer;
}
// prevents instantiation
TopScoreDocCollector(
int numHits, ScoreDoc after, int totalHitsThreshold, MaxScoreAccumulator minScoreAcc) {
super(new HitQueue(numHits, true));
this.after = after;
this.totalHitsThreshold = totalHitsThreshold;
this.minScoreAcc = minScoreAcc;
}

static class SimpleTopScoreDocCollector extends TopScoreDocCollector {

SimpleTopScoreDocCollector(
int numHits, int totalHitsThreshold, MaxScoreAccumulator minScoreAcc) {
super(numHits, totalHitsThreshold, minScoreAcc);
}

@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
// reset the minimum competitive score
docBase = context.docBase;
minCompetitiveScore = 0f;

return new ScorerLeafCollector() {
@Override
public void setScorer(Scorable scorer) throws IOException {
super.setScorer(scorer);
if (minScoreAcc == null) {
updateMinCompetitiveScore(scorer);
} else {
updateGlobalMinCompetitiveScore(scorer);
@Override
protected int topDocsSize() {
// Note: this relies on sentinel values having Integer.MAX_VALUE as a doc ID.
int[] validTopHitCount = new int[1];
pq.forEach(
scoreDoc -> {
if (scoreDoc.doc != Integer.MAX_VALUE) {
validTopHitCount[0]++;
}
}

@Override
public void collect(int doc) throws IOException {
float score = scorer.score();

int hitCountSoFar = ++totalHits;
});
return validTopHitCount[0];
}

if (minScoreAcc != null && (hitCountSoFar & minScoreAcc.modInterval) == 0) {
updateGlobalMinCompetitiveScore(scorer);
}
@Override
protected TopDocs newTopDocs(ScoreDoc[] results, int start) {
return results == null
? new TopDocs(new TotalHits(totalHits, totalHitsRelation), new ScoreDoc[0])
: new TopDocs(new TotalHits(totalHits, totalHitsRelation), results);
}

if (score <= pqTop.score) {
// Note: for queries that match lots of hits, this is the common case: most hits are not
// competitive.
if (hitCountSoFar == totalHitsThreshold + 1) {
// we just reached totalHitsThreshold, we can start setting the min
// competitive score now
updateMinCompetitiveScore(scorer);
}
// Since docs are returned in-order (i.e., increasing doc Id), a document
// with equal score to pqTop.score cannot compete since HitQueue favors
// documents with lower doc Ids. Therefore reject those docs too.
} else {
collectCompetitiveHit(doc, score);
}
}
@Override
public ScoreMode scoreMode() {
return totalHitsThreshold == Integer.MAX_VALUE ? ScoreMode.COMPLETE : ScoreMode.TOP_SCORES;
}

private void collectCompetitiveHit(int doc, float score) throws IOException {
pqTop.doc = doc + docBase;
pqTop.score = score;
pqTop = pq.updateTop();
updateMinCompetitiveScore(scorer);
}
};
@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
final int docBase = context.docBase;
final ScoreDoc after = this.after;
final float afterScore;
final int afterDoc;
if (after == null) {
afterScore = Float.POSITIVE_INFINITY;
afterDoc = DocIdSetIterator.NO_MORE_DOCS;
} else {
afterScore = after.score;
afterDoc = after.doc - context.docBase;
}
}

static class PagingTopScoreDocCollector extends TopScoreDocCollector {
return new LeafCollector() {

private final ScoreDoc after;
private Scorable scorer;
// HitQueue implements getSentinelObject to return a ScoreDoc, so we know
// that at this point top() is already initialized.
private ScoreDoc pqTop = pq.top();
private float minCompetitiveScore;

PagingTopScoreDocCollector(
int numHits, ScoreDoc after, int totalHitsThreshold, MaxScoreAccumulator minScoreAcc) {
super(numHits, totalHitsThreshold, minScoreAcc);
this.after = after;
}
@Override
public void setScorer(Scorable scorer) throws IOException {
this.scorer = scorer;
if (minScoreAcc == null) {
updateMinCompetitiveScore(scorer);
} else {
updateGlobalMinCompetitiveScore(scorer);
}
}

@Override
protected int topDocsSize() {
// Note: this relies on sentinel values having Integer.MAX_VALUE as a doc ID.
int[] validTopHitCount = new int[1];
pq.forEach(
scoreDoc -> {
if (scoreDoc.doc != Integer.MAX_VALUE) {
validTopHitCount[0]++;
}
});
return validTopHitCount[0];
}
@Override
public void collect(int doc) throws IOException {
float score = scorer.score();

@Override
protected TopDocs newTopDocs(ScoreDoc[] results, int start) {
return results == null
? new TopDocs(new TotalHits(totalHits, totalHitsRelation), new ScoreDoc[0])
: new TopDocs(new TotalHits(totalHits, totalHitsRelation), results);
}
int hitCountSoFar = ++totalHits;

@Override
public LeafCollector getLeafCollector(LeafReaderContext context) throws IOException {
docBase = context.docBase;
final int afterDoc = after.doc - context.docBase;
minCompetitiveScore = 0f;
if (minScoreAcc != null && (hitCountSoFar & minScoreAcc.modInterval) == 0) {
updateGlobalMinCompetitiveScore(scorer);
}

return new ScorerLeafCollector() {
@Override
public void setScorer(Scorable scorer) throws IOException {
super.setScorer(scorer);
if (minScoreAcc == null) {
if (after != null && (score > afterScore || (score == afterScore && doc <= afterDoc))) {
// hit was collected on a previous page
if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) {
// we just reached totalHitsThreshold, we can start setting the min
// competitive score now
updateMinCompetitiveScore(scorer);
} else {
updateGlobalMinCompetitiveScore(scorer);
}
return;
}

@Override
public void collect(int doc) throws IOException {
float score = scorer.score();

int hitCountSoFar = ++totalHits;

if (minScoreAcc != null && (hitCountSoFar & minScoreAcc.modInterval) == 0) {
updateGlobalMinCompetitiveScore(scorer);
if (score <= pqTop.score) {
// Note: for queries that match lots of hits, this is the common case: most hits are not
// competitive.
if (hitCountSoFar == totalHitsThreshold + 1) {
// we just exceeded totalHitsThreshold, we can start setting the min
// competitive score now
updateMinCompetitiveScore(scorer);
}

float afterScore = after.score;
if (score > afterScore || (score == afterScore && doc <= afterDoc)) {
// hit was collected on a previous page
if (totalHitsRelation == TotalHits.Relation.EQUAL_TO) {
// we just reached totalHitsThreshold, we can start setting the min
// competitive score now
updateMinCompetitiveScore(scorer);
}
return;
}
// Since docs are returned in-order (i.e., increasing doc Id), a document
// with equal score to pqTop.score cannot compete since HitQueue favors
// documents with lower doc Ids. Therefore reject those docs too.
} else {
collectCompetitiveHit(doc, score);
}
}

if (score <= pqTop.score) {
// Note: for queries that match lots of hits, this is the common case: most hits are not
// competitive.
if (hitCountSoFar == totalHitsThreshold + 1) {
// we just exceeded totalHitsThreshold, we can start setting the min
// competitive score now
updateMinCompetitiveScore(scorer);
}
private void collectCompetitiveHit(int doc, float score) throws IOException {
pqTop.doc = doc + docBase;
pqTop.score = score;
pqTop = pq.updateTop();
updateMinCompetitiveScore(scorer);
}

// Since docs are returned in-order (i.e., increasing doc Id), a document
// with equal score to pqTop.score cannot compete since HitQueue favors
// documents with lower doc Ids. Therefore reject those docs too.
} else {
collectCompetitiveHit(doc, score);
private void updateGlobalMinCompetitiveScore(Scorable scorer) throws IOException {
assert minScoreAcc != null;
long maxMinScore = minScoreAcc.getRaw();
if (maxMinScore != Long.MIN_VALUE) {
// since we tie-break on doc id and collect in doc id order we can require
// the next float if the global minimum score is set on a document id that is
// smaller than the ids in the current leaf
float score = MaxScoreAccumulator.toScore(maxMinScore);
score = docBase >= MaxScoreAccumulator.docId(maxMinScore) ? Math.nextUp(score) : score;
if (score > minCompetitiveScore) {
scorer.setMinCompetitiveScore(score);
minCompetitiveScore = score;
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
}
}

private void collectCompetitiveHit(int doc, float score) throws IOException {
pqTop.doc = doc + docBase;
pqTop.score = score;
pqTop = pq.updateTop();
updateMinCompetitiveScore(scorer);
}
};
}
}

int docBase;
ScoreDoc pqTop;
final int totalHitsThreshold;
final MaxScoreAccumulator minScoreAcc;
float minCompetitiveScore;

// prevents instantiation
TopScoreDocCollector(int numHits, int totalHitsThreshold, MaxScoreAccumulator minScoreAcc) {
super(new HitQueue(numHits, true));

// HitQueue implements getSentinelObject to return a ScoreDoc, so we know
// that at this point top() is already initialized.
pqTop = pq.top();
this.totalHitsThreshold = totalHitsThreshold;
this.minScoreAcc = minScoreAcc;
}

@Override
protected TopDocs newTopDocs(ScoreDoc[] results, int start) {
if (results == null) {
return EMPTY_TOPDOCS;
}

return new TopDocs(new TotalHits(totalHits, totalHitsRelation), results);
}

@Override
public ScoreMode scoreMode() {
return totalHitsThreshold == Integer.MAX_VALUE ? ScoreMode.COMPLETE : ScoreMode.TOP_SCORES;
}

protected void updateGlobalMinCompetitiveScore(Scorable scorer) throws IOException {
assert minScoreAcc != null;
long maxMinScore = minScoreAcc.getRaw();
if (maxMinScore != Long.MIN_VALUE) {
// since we tie-break on doc id and collect in doc id order we can require
// the next float if the global minimum score is set on a document id that is
// smaller than the ids in the current leaf
float score = MaxScoreAccumulator.toScore(maxMinScore);
score = docBase >= MaxScoreAccumulator.docId(maxMinScore) ? Math.nextUp(score) : score;
if (score > minCompetitiveScore) {
scorer.setMinCompetitiveScore(score);
minCompetitiveScore = score;
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
}
}
}

protected void updateMinCompetitiveScore(Scorable scorer) throws IOException {
if (totalHits > totalHitsThreshold) {
// since we tie-break on doc id and collect in doc id order, we can require
// the next float
// pqTop is never null since TopScoreDocCollector fills the priority queue with sentinel
// values
// if the top element is a sentinel value, its score will be -Infty and the below logic is
// still valid
float localMinScore = Math.nextUp(pqTop.score);
if (localMinScore > minCompetitiveScore) {
scorer.setMinCompetitiveScore(localMinScore);
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
minCompetitiveScore = localMinScore;
if (minScoreAcc != null) {
// we don't use the next float but we register the document id so that other leaves or
// leaf partitions can require it if they are after the current maximum
minScoreAcc.accumulate(pqTop.doc, pqTop.score);
private void updateMinCompetitiveScore(Scorable scorer) throws IOException {
if (totalHits > totalHitsThreshold) {
// since we tie-break on doc id and collect in doc id order, we can require the next float
// pqTop is never null since TopScoreDocCollector fills the priority queue with sentinel
// values if the top element is a sentinel value, its score will be -Infty and the below
// logic is still valid
float localMinScore = Math.nextUp(pqTop.score);
if (localMinScore > minCompetitiveScore) {
scorer.setMinCompetitiveScore(localMinScore);
totalHitsRelation = TotalHits.Relation.GREATER_THAN_OR_EQUAL_TO;
minCompetitiveScore = localMinScore;
if (minScoreAcc != null) {
// we don't use the next float but we register the document id so that other leaves or
// leaf partitions can require it if they are after the current maximum
minScoreAcc.accumulate(pqTop.doc, pqTop.score);
}
}
}
}
}
};
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -125,13 +125,7 @@ public TopScoreDocCollectorManager(int numHits, int totalHitsThreshold) {

@Override
public TopScoreDocCollector newCollector() {
if (after == null) {
return new TopScoreDocCollector.SimpleTopScoreDocCollector(
numHits, totalHitsThreshold, minScoreAcc);
} else {
return new TopScoreDocCollector.PagingTopScoreDocCollector(
numHits, after, totalHitsThreshold, minScoreAcc);
}
return new TopScoreDocCollector(numHits, after, totalHitsThreshold, minScoreAcc);
}

@Override
Expand Down
Loading

0 comments on commit 3fcadaf

Please sign in to comment.