Skip to content

Commit

Permalink
Cleanup redundant allocations and code around Comparator use (#13795)
Browse files Browse the repository at this point in the history
Noticed some visible allocations in CompetitiveImpactAccumulator
during benchmarking and fixed the needless allocation for the comparator
in that class as well as a couple other similar spots where needless
classes and/or objects could easily be replaced by more lightweight
solutions.
  • Loading branch information
original-brownbear authored and jpountz committed Sep 17, 2024
1 parent fef2560 commit 71ca6b4
Show file tree
Hide file tree
Showing 21 changed files with 114 additions and 264 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,7 @@ List<String> suggest(String word, WordCase originalCase, Set<Suggestion> prevSug

private List<Weighted<Root<String>>> findSimilarDictionaryEntries(
String word, WordCase originalCase) {
Comparator<Weighted<Root<String>>> natural = Comparator.naturalOrder();
PriorityQueue<Weighted<Root<String>>> roots = new PriorityQueue<>(natural.reversed());
PriorityQueue<Weighted<Root<String>>> roots = new PriorityQueue<>(Comparator.reverseOrder());

char[] excludeFlags = dictionary.allNonSuggestibleFlags();
FlagEnumerator.Lookup flagLookup = dictionary.flagLookup;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
package org.apache.lucene.analysis.hunspell;

import java.io.IOException;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
Expand Down Expand Up @@ -117,7 +115,16 @@ public boolean incrementToken() throws IOException {
}

if (longestOnly && buffer.size() > 1) {
Collections.sort(buffer, lengthComparator);
buffer.sort(
(o1, o2) -> {
int cmp = Integer.compare(o2.length, o1.length);
if (cmp == 0) {
// tie break on text
return o2.compareTo(o1);
} else {
return cmp;
}
});
}

CharsRef stem = buffer.remove(0);
Expand All @@ -139,18 +146,4 @@ public void reset() throws IOException {
super.reset();
buffer = null;
}

static final Comparator<CharsRef> lengthComparator =
new Comparator<CharsRef>() {
@Override
public int compare(CharsRef o1, CharsRef o2) {
int cmp = Integer.compare(o2.length, o1.length);
if (cmp == 0) {
// tie break on text
return o2.compareTo(o1);
} else {
return cmp;
}
}
};
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import org.apache.lucene.analysis.CharArraySet;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
Expand Down Expand Up @@ -147,26 +146,23 @@ private final boolean buildSingleOutputToken() throws IOException {

Arrays.sort(
items,
new Comparator<Object>() {
@Override
public int compare(Object o1, Object o2) {
char[] v1 = (char[]) o1;
char[] v2 = (char[]) o2;
int len1 = v1.length;
int len2 = v2.length;
int lim = Math.min(len1, len2);

int k = 0;
while (k < lim) {
char c1 = v1[k];
char c2 = v2[k];
if (c1 != c2) {
return c1 - c2;
}
k++;
(o1, o2) -> {
char[] v1 = (char[]) o1;
char[] v2 = (char[]) o2;
int len1 = v1.length;
int len2 = v2.length;
int lim = Math.min(len1, len2);

int k = 0;
while (k < lim) {
char c1 = v1[k];
char c2 = v2[k];
if (c1 != c2) {
return c1 - c2;
}
return len1 - len2;
k++;
}
return len1 - len2;
});

// TODO lets append directly to termAttribute?
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.regex.Pattern;
import org.apache.lucene.analysis.util.CSVUtil;
Expand Down Expand Up @@ -86,14 +84,7 @@ private UserDictionary(List<String[]> featureEntries) throws IOException {
// TODO: should we allow multiple segmentations per input 'phrase'?
// the old treemap didn't support this either, and i'm not sure if it's needed/useful?

Collections.sort(
featureEntries,
new Comparator<String[]>() {
@Override
public int compare(String[] left, String[] right) {
return left[0].compareTo(right[0]);
}
});
featureEntries.sort((left, right) -> left[0].compareTo(right[0]));

List<String> data = new ArrayList<>(featureEntries.size());
List<int[]> segmentations = new ArrayList<>(featureEntries.size());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

import java.util.ArrayList;
import java.util.Arrays;
import java.util.Comparator;
import java.util.Iterator;
import java.util.List;
import java.util.TreeSet;
Expand All @@ -39,20 +38,17 @@ public final class CompetitiveImpactAccumulator {
/** Sole constructor. */
public CompetitiveImpactAccumulator() {
maxFreqs = new int[256];
Comparator<Impact> comparator =
new Comparator<Impact>() {
@Override
public int compare(Impact o1, Impact o2) {
// greater freqs compare greater
int cmp = Integer.compare(o1.freq, o2.freq);
if (cmp == 0) {
// greater norms compare lower
cmp = Long.compareUnsigned(o2.norm, o1.norm);
}
return cmp;
}
};
otherFreqNormPairs = new TreeSet<>(comparator);
otherFreqNormPairs =
new TreeSet<>(
(o1, o2) -> {
// greater freqs compare greater
int cmp = Integer.compare(o1.freq, o2.freq);
if (cmp == 0) {
// greater norms compare lower
cmp = Long.compareUnsigned(o2.norm, o1.norm);
}
return cmp;
});
}

/** Reset to the same state it was in after creation. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
package org.apache.lucene.document;

import java.util.Arrays;
import java.util.Comparator;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.search.MatchNoDocsQuery;
Expand Down Expand Up @@ -238,14 +237,7 @@ public static Query newSetQuery(String field, byte[]... values) {

// Don't unexpectedly change the user's incoming values array:
byte[][] sortedValues = values.clone();
Arrays.sort(
sortedValues,
new Comparator<byte[]>() {
@Override
public int compare(byte[] a, byte[] b) {
return Arrays.compareUnsigned(a, 0, a.length, b, 0, b.length);
}
});
Arrays.sort(sortedValues, (a, b) -> Arrays.compareUnsigned(a, 0, a.length, b, 0, b.length));

final BytesRef encoded = new BytesRef(new byte[bytesPerDim]);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import java.net.InetAddress;
import java.net.UnknownHostException;
import java.util.Arrays;
import java.util.Comparator;
import org.apache.lucene.index.PointValues;
import org.apache.lucene.search.PointInSetQuery;
import org.apache.lucene.search.PointRangeQuery;
Expand Down Expand Up @@ -288,14 +287,7 @@ public static Query newSetQuery(String field, InetAddress... values) {
sortedValues[i] = encode(values[i]);
}

Arrays.sort(
sortedValues,
new Comparator<byte[]>() {
@Override
public int compare(byte[] a, byte[] b) {
return Arrays.compareUnsigned(a, 0, BYTES, b, 0, BYTES);
}
});
Arrays.sort(sortedValues, (a, b) -> Arrays.compareUnsigned(a, 0, BYTES, b, 0, BYTES));

final BytesRef encoded = new BytesRef(new byte[BYTES]);

Expand Down
11 changes: 1 addition & 10 deletions lucene/core/src/java/org/apache/lucene/index/MultiTermsEnum.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@

import java.io.IOException;
import java.util.Arrays;
import java.util.Comparator;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefBuilder;
Expand All @@ -32,14 +31,6 @@
*/
public final class MultiTermsEnum extends BaseTermsEnum {

private static final Comparator<TermsEnumWithSlice> INDEX_COMPARATOR =
new Comparator<TermsEnumWithSlice>() {
@Override
public int compare(TermsEnumWithSlice o1, TermsEnumWithSlice o2) {
return o1.subIndex - o2.subIndex;
}
};

private final TermMergeQueue queue;
// all of our subs (one per sub-reader)
private final TermsEnumWithSlice[] subs;
Expand Down Expand Up @@ -338,7 +329,7 @@ public PostingsEnum postings(PostingsEnum reuse, int flags) throws IOException {

int upto = 0;

ArrayUtil.timSort(top, 0, numTop, INDEX_COMPARATOR);
ArrayUtil.timSort(top, 0, numTop, (o1, o2) -> o1.subIndex - o2.subIndex);

for (int i = 0; i < numTop; i++) {

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ final class DisjunctionScoreBlockBoundaryPropagator {
throw new RuntimeException(e);
}
})
.thenComparing(Comparator.comparing(s -> s.iterator().cost()));
.thenComparing(s -> s.iterator().cost());

private final Scorer[] scorers;
private final float[] maxScores;
Expand Down
32 changes: 11 additions & 21 deletions lucene/core/src/java/org/apache/lucene/search/QueryRescorer.java
Original file line number Diff line number Diff line change
Expand Up @@ -50,14 +50,7 @@ public TopDocs rescore(IndexSearcher searcher, TopDocs firstPassTopDocs, int top
throws IOException {
ScoreDoc[] hits = firstPassTopDocs.scoreDocs.clone();

Arrays.sort(
hits,
new Comparator<ScoreDoc>() {
@Override
public int compare(ScoreDoc a, ScoreDoc b) {
return a.doc - b.doc;
}
});
Arrays.sort(hits, (a, b) -> a.doc - b.doc);

List<LeafReaderContext> leaves = searcher.getIndexReader().leaves();

Expand Down Expand Up @@ -111,19 +104,16 @@ public int compare(ScoreDoc a, ScoreDoc b) {
}

Comparator<ScoreDoc> sortDocComparator =
new Comparator<ScoreDoc>() {
@Override
public int compare(ScoreDoc a, ScoreDoc b) {
// Sort by score descending, then docID ascending:
if (a.score > b.score) {
return -1;
} else if (a.score < b.score) {
return 1;
} else {
// This subtraction can't overflow int
// because docIDs are >= 0:
return a.doc - b.doc;
}
(a, b) -> {
// Sort by score descending, then docID ascending:
if (a.score > b.score) {
return -1;
} else if (a.score < b.score) {
return 1;
} else {
// This subtraction can't overflow int
// because docIDs are >= 0:
return a.doc - b.doc;
}
};

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
package org.apache.lucene.search;

import java.io.IOException;
import java.util.Comparator;
import java.util.HashMap;
import java.util.Map;
import java.util.PriorityQueue;
Expand Down Expand Up @@ -160,7 +159,7 @@ public boolean collect(BytesRef bytes) throws IOException {

final B b = getTopLevelBuilder();
final ScoreTerm[] scoreTerms = stQueue.toArray(new ScoreTerm[stQueue.size()]);
ArrayUtil.timSort(scoreTerms, scoreTermSortByTermComp);
ArrayUtil.timSort(scoreTerms, (st1, st2) -> st1.bytes.get().compareTo(st2.bytes.get()));

for (final ScoreTerm st : scoreTerms) {
final Term term = new Term(query.field, st.bytes.toBytesRef());
Expand Down Expand Up @@ -188,14 +187,6 @@ public boolean equals(Object obj) {
return true;
}

private static final Comparator<ScoreTerm> scoreTermSortByTermComp =
new Comparator<ScoreTerm>() {
@Override
public int compare(ScoreTerm st1, ScoreTerm st2) {
return st1.bytes.get().compareTo(st2.bytes.get());
}
};

static final class ScoreTerm implements Comparable<ScoreTerm> {
public final BytesRefBuilder bytes = new BytesRefBuilder();
public float boost;
Expand Down
10 changes: 1 addition & 9 deletions lucene/core/src/java/org/apache/lucene/util/Accountables.java
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import java.util.ArrayList;
import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.List;
import java.util.Map;

Expand Down Expand Up @@ -98,14 +97,7 @@ public static Collection<Accountable> namedAccountables(
for (Map.Entry<?, ? extends Accountable> kv : in.entrySet()) {
resources.add(namedAccountable(prefix + " '" + kv.getKey() + "'", kv.getValue()));
}
Collections.sort(
resources,
new Comparator<Accountable>() {
@Override
public int compare(Accountable o1, Accountable o2) {
return o1.toString().compareTo(o2.toString());
}
});
resources.sort((o1, o2) -> o1.toString().compareTo(o2.toString()));
return Collections.unmodifiableList(resources);
}

Expand Down
Loading

0 comments on commit 71ca6b4

Please sign in to comment.