Skip to content

Commit

Permalink
Replace List<Integer> by IntArrayList and List<Long> by LongArrayList. (
Browse files Browse the repository at this point in the history
  • Loading branch information
bruno-roustant committed May 27, 2024
1 parent 7f722d0 commit a8def94
Show file tree
Hide file tree
Showing 39 changed files with 1,996 additions and 146 deletions.
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,8 @@ Optimizations

* GITHUB#13400: Replace Set<Integer> by IntHashSet and Set<Long> by LongHashSet. (Bruno Roustant)

* GITHUB#13406: Replace List<Integer> by IntArrayList and List<Long> by LongArrayList. (Bruno Roustant)

Bug Fixes
---------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
Expand All @@ -29,6 +28,8 @@
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.RollingBuffer;
import org.apache.lucene.util.hppc.IntArrayList;
import org.apache.lucene.util.hppc.IntCursor;

/**
* Converts an incoming graph token stream, such as one from {@link SynonymGraphFilter}, into a flat
Expand Down Expand Up @@ -90,7 +91,7 @@ public void reset() {
* "frontier" of nodes we've seen but can't yet output because they are not frozen.
*/
private static final class OutputNode implements RollingBuffer.Resettable {
private final List<Integer> inputNodes = new ArrayList<>();
private final IntArrayList inputNodes = new IntArrayList();

/** Node ID for this output, or -1 if we haven't been assigned yet. */
int node = -1;
Expand Down Expand Up @@ -175,8 +176,8 @@ private boolean releaseBufferedToken() {
}

int maxToNode = -1;
for (int inputNodeID : output.inputNodes) {
InputNode inputNode = inputNodes.get(inputNodeID);
for (IntCursor inputNodeID : output.inputNodes) {
InputNode inputNode = inputNodes.get(inputNodeID.value);
assert inputNode.outputNode == outputFrom;
maxToNode = Math.max(maxToNode, inputNode.maxToNode);
}
Expand Down Expand Up @@ -280,7 +281,7 @@ private void freeBefore(OutputNode output) {
Related tests testShingledGap, testShingledGapWithHoles
*/
outputFrom++;
int freeBefore = Collections.min(output.inputNodes);
int freeBefore = output.inputNodes.stream().min().orElseThrow();
// This will catch a node being freed early if it is input to the next output.
// Could a freed early node be input to a later output?
assert outputNodes.get(outputFrom).inputNodes.stream().filter(n -> freeBefore > n).count() == 0
Expand Down Expand Up @@ -349,7 +350,7 @@ public boolean incrementToken() throws IOException {
* The last node in the alt path didn't arrive to remove this reference.
*/
assert inputNodes.get(inputFrom).tokens.isEmpty() : "about to remove non empty edge";
outSrc.inputNodes.remove(Integer.valueOf(inputFrom));
outSrc.inputNodes.removeElement(inputFrom);
src.outputNode = -1;
int prevEndOffset = outSrc.endOffset;

Expand Down Expand Up @@ -381,8 +382,7 @@ public boolean incrementToken() throws IOException {

if (outputEndNode > dest.outputNode) {
if (dest.outputNode != -1) {
boolean removed =
outputNodes.get(dest.outputNode).inputNodes.remove(Integer.valueOf(inputTo));
boolean removed = outputNodes.get(dest.outputNode).inputNodes.removeElement(inputTo);
assert removed;
}
// System.out.println(" increase output node: " + dest.outputNode + " vs " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@
import org.apache.lucene.util.fst.FSTCompiler;
import org.apache.lucene.util.fst.IntSequenceOutputs;
import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.hppc.IntArrayList;
import org.apache.lucene.util.hppc.IntCursor;

/** In-memory structure for the dictionary (.dic) and affix (.aff) data of a hunspell dictionary. */
public class Dictionary {
Expand Down Expand Up @@ -330,8 +332,8 @@ static IntsRef nextArc(
*/
private void readAffixFile(InputStream affixStream, CharsetDecoder decoder, FlagEnumerator flags)
throws IOException, ParseException {
TreeMap<String, List<Integer>> prefixes = new TreeMap<>();
TreeMap<String, List<Integer>> suffixes = new TreeMap<>();
TreeMap<String, IntArrayList> prefixes = new TreeMap<>();
TreeMap<String, IntArrayList> suffixes = new TreeMap<>();
Set<Character> prefixContFlags = new HashSet<>();
Set<Character> suffixContFlags = new HashSet<>();
Map<String, Integer> seenPatterns = new HashMap<>();
Expand Down Expand Up @@ -643,17 +645,17 @@ private Breaks parseBreaks(LineNumberReader reader, String line)
return new Breaks(starting, ending, middle);
}

private FST<IntsRef> affixFST(TreeMap<String, List<Integer>> affixes) throws IOException {
private FST<IntsRef> affixFST(TreeMap<String, IntArrayList> affixes) throws IOException {
IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton();
FSTCompiler<IntsRef> fstCompiler =
new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE4, outputs).build();
IntsRefBuilder scratch = new IntsRefBuilder();
for (Map.Entry<String, List<Integer>> entry : affixes.entrySet()) {
for (Map.Entry<String, IntArrayList> entry : affixes.entrySet()) {
Util.toUTF32(entry.getKey(), scratch);
List<Integer> entries = entry.getValue();
IntArrayList entries = entry.getValue();
IntsRef output = new IntsRef(entries.size());
for (Integer c : entries) {
output.ints[output.length++] = c;
for (IntCursor c : entries) {
output.ints[output.length++] = c.value;
}
fstCompiler.add(scratch.get(), output);
}
Expand All @@ -670,7 +672,7 @@ private FST<IntsRef> affixFST(TreeMap<String, List<Integer>> affixes) throws IOE
* @throws IOException Can be thrown while reading the rule
*/
private void parseAffix(
TreeMap<String, List<Integer>> affixes,
TreeMap<String, IntArrayList> affixes,
Set<Character> secondStageFlags,
String header,
LineNumberReader reader,
Expand Down Expand Up @@ -792,7 +794,7 @@ private void parseAffix(
affixArg = new StringBuilder(affixArg).reverse().toString();
}

affixes.computeIfAbsent(affixArg, __ -> new ArrayList<>()).add(currentAffix);
affixes.computeIfAbsent(affixArg, __ -> new IntArrayList()).add(currentAffix);
currentAffix++;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.IntSequenceOutputs;
import org.apache.lucene.util.hppc.IntArrayList;

/**
* A data structure for memory-efficient word storage and fast lookup/enumeration. Each dictionary
Expand Down Expand Up @@ -262,7 +263,7 @@ static class Builder {

private final IntsRefBuilder currentOrds = new IntsRefBuilder();
private final List<char[]> group = new ArrayList<>();
private final List<Integer> morphDataIDs = new ArrayList<>();
private final IntArrayList morphDataIDs = new IntArrayList();
private String currentEntry = null;
private final int wordCount;
private final double hashFactor;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,13 @@
package org.apache.lucene.analysis.path;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.IgnoreRandomChains;
import org.apache.lucene.util.hppc.IntArrayList;

/**
* Tokenizer for domain-like hierarchies.
Expand Down Expand Up @@ -99,7 +98,7 @@ public ReversePathHierarchyTokenizer(
this.skip = skip;
resultToken = new StringBuilder(bufferSize);
resultTokenBuffer = new char[bufferSize];
delimiterPositions = new ArrayList<>(bufferSize / 10);
delimiterPositions = new IntArrayList(bufferSize / 10);
}

private static final int DEFAULT_BUFFER_SIZE = 1024;
Expand All @@ -119,7 +118,7 @@ public ReversePathHierarchyTokenizer(
private int skipped = 0;
private StringBuilder resultToken;

private List<Integer> delimiterPositions;
private IntArrayList delimiterPositions;
private int delimitersCount = -1;
private char[] resultTokenBuffer;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import java.io.IOException;
import java.io.Reader;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Set;
Expand All @@ -38,6 +37,7 @@
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FSTCompiler;
import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.hppc.IntArrayList;
import org.apache.lucene.util.hppc.IntHashSet;

/**
Expand Down Expand Up @@ -91,7 +91,7 @@ public Builder(boolean dedup) {
private static class MapEntry {
boolean includeOrig;
// we could sort for better sharing ultimately, but it could confuse people
ArrayList<Integer> ords = new ArrayList<>();
IntArrayList ords = new IntArrayList();
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -88,8 +88,9 @@ private UserDictionary(List<String> entries) throws IOException {

String lastToken = null;
List<int[]> segmentations = new ArrayList<>(entries.size());
List<Short> rightIds = new ArrayList<>(entries.size());
short[] rightIds = new short[entries.size()];
long ord = 0;
int entryIndex = 0;
for (String entry : entries) {
String[] splits = entry.split("\\s+");
String token = splits[0];
Expand All @@ -99,12 +100,12 @@ private UserDictionary(List<String> entries) throws IOException {
char lastChar = entry.charAt(entry.length() - 1);
if (charDef.isHangul(lastChar)) {
if (charDef.hasCoda(lastChar)) {
rightIds.add(RIGHT_ID_T);
rightIds[entryIndex++] = RIGHT_ID_T;
} else {
rightIds.add(RIGHT_ID_F);
rightIds[entryIndex++] = RIGHT_ID_F;
}
} else {
rightIds.add(RIGHT_ID);
rightIds[entryIndex++] = RIGHT_ID;
}

if (splits.length == 1) {
Expand Down Expand Up @@ -140,10 +141,8 @@ private UserDictionary(List<String> entries) throws IOException {
this.fst =
new TokenInfoFST(FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()));
this.segmentations = segmentations.toArray(new int[segmentations.size()][]);
this.rightIds = new short[rightIds.size()];
for (int i = 0; i < rightIds.size(); i++) {
this.rightIds[i] = rightIds.get(i);
}
assert entryIndex == rightIds.length;
this.rightIds = rightIds;
}

public TokenInfoFST getFST() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.cn.smart.Utility;
import org.apache.lucene.util.hppc.IntArrayList;
import org.apache.lucene.util.hppc.IntObjectHashMap;
import org.apache.lucene.util.hppc.ObjectCursor;

Expand Down Expand Up @@ -197,19 +198,18 @@ public List<SegToken> getShortPath() {
int preNode, lastNode;
lastNode = path.size() - 1;
current = lastNode;
List<Integer> rpath = new ArrayList<>();
IntArrayList rpath = new IntArrayList();
List<SegToken> resultPath = new ArrayList<>();

rpath.add(current);
while (current != 0) {
PathNode currentPathNode = path.get(current);
preNode = currentPathNode.preNode;
rpath.add(Integer.valueOf(preNode));
rpath.add(preNode);
current = preNode;
}
for (int j = rpath.size() - 1; j >= 0; j--) {
Integer idInteger = rpath.get(j);
int id = idInteger.intValue();
int id = rpath.get(j);
SegToken t = segTokenList.get(id);
resultPath.add(t);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.util.Map;
import java.util.Properties;
import java.util.StringTokenizer;
import org.apache.lucene.util.hppc.IntArrayList;

/**
* Perf run configuration properties.
Expand Down Expand Up @@ -338,15 +339,15 @@ private int[] propToIntArray(String s) {
return new int[] {Integer.parseInt(s)};
}

ArrayList<Integer> a = new ArrayList<>();
IntArrayList a = new IntArrayList();
StringTokenizer st = new StringTokenizer(s, ":");
while (st.hasMoreTokens()) {
String t = st.nextToken();
a.add(Integer.valueOf(t));
a.add(Integer.parseInt(t));
}
int[] res = new int[a.size()];
for (int i = 0; i < a.size(); i++) {
res[i] = a.get(i).intValue();
res[i] = a.get(i);
}
return res;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Operations;
import org.apache.lucene.util.automaton.Transition;
import org.apache.lucene.util.hppc.IntArrayList;
import org.apache.lucene.util.hppc.IntCursor;
import org.apache.lucene.util.hppc.IntIntHashMap;

/** Converts an Automaton into a TokenStream. */
Expand All @@ -49,7 +51,7 @@ public static TokenStream toTokenStream(Automaton automaton) {
throw new IllegalArgumentException("Automaton must be finite");
}

List<List<Integer>> positionNodes = new ArrayList<>();
List<IntArrayList> positionNodes = new ArrayList<>();

Transition[][] transitions = automaton.getSortedTransitions();

Expand Down Expand Up @@ -77,7 +79,7 @@ public static TokenStream toTokenStream(Automaton automaton) {
}
}
if (positionNodes.size() == currState.pos) {
List<Integer> posIncs = new ArrayList<>();
IntArrayList posIncs = new IntArrayList();
posIncs.add(currState.id);
positionNodes.add(posIncs);
} else {
Expand All @@ -93,10 +95,10 @@ public static TokenStream toTokenStream(Automaton automaton) {
}

List<List<EdgeToken>> edgesByLayer = new ArrayList<>();
for (List<Integer> layer : positionNodes) {
for (IntArrayList layer : positionNodes) {
List<EdgeToken> edges = new ArrayList<>();
for (int state : layer) {
for (Transition t : transitions[state]) {
for (IntCursor state : layer) {
for (Transition t : transitions[state.value]) {
// each edge in the token stream can only be on value, though a transition takes a range.
for (int val = t.min; val <= t.max; val++) {
int destLayer = idToPos.get(t.dest);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
import org.apache.lucene.util.hnsw.CloseableRandomVectorScorerSupplier;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
import org.apache.lucene.util.hnsw.RandomVectorScorerSupplier;
import org.apache.lucene.util.hppc.IntArrayList;
import org.apache.lucene.util.quantization.QuantizedByteVectorValues;
import org.apache.lucene.util.quantization.QuantizedVectorsReader;
import org.apache.lucene.util.quantization.ScalarQuantizer;
Expand Down Expand Up @@ -540,7 +541,7 @@ private ScalarQuantizedCloseableRandomVectorScorerSupplier mergeOneFieldToIndex(
}

static ScalarQuantizer mergeQuantiles(
List<ScalarQuantizer> quantizationStates, List<Integer> segmentSizes, byte bits) {
List<ScalarQuantizer> quantizationStates, IntArrayList segmentSizes, byte bits) {
assert quantizationStates.size() == segmentSizes.size();
if (quantizationStates.isEmpty()) {
return null;
Expand Down Expand Up @@ -633,7 +634,7 @@ public static ScalarQuantizer mergeAndRecalculateQuantiles(
throws IOException {
assert fieldInfo.getVectorEncoding().equals(VectorEncoding.FLOAT32);
List<ScalarQuantizer> quantizationStates = new ArrayList<>(mergeState.liveDocs.length);
List<Integer> segmentSizes = new ArrayList<>(mergeState.liveDocs.length);
IntArrayList segmentSizes = new IntArrayList(mergeState.liveDocs.length);
for (int i = 0; i < mergeState.liveDocs.length; i++) {
FloatVectorValues fvv;
if (mergeState.knnVectorsReaders[i] != null
Expand Down
Loading

0 comments on commit a8def94

Please sign in to comment.