Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace List<Integer> by IntArrayList and List<Long> by LongArrayList. #13406

Merged
merged 5 commits into from
May 25, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -352,6 +352,8 @@ Optimizations

* GITHUB#13400: Replace Set<Integer> by IntHashSet and Set<Long> by LongHashSet. (Bruno Roustant)

* GITHUB#13406: Replace List<Integer> by IntArrayList and List<Long> by LongArrayList. (Bruno Roustant)

Bug Fixes
---------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
Expand All @@ -29,6 +28,8 @@
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.RollingBuffer;
import org.apache.lucene.util.hppc.IntArrayList;
import org.apache.lucene.util.hppc.IntCursor;

/**
* Converts an incoming graph token stream, such as one from {@link SynonymGraphFilter}, into a flat
Expand Down Expand Up @@ -90,7 +91,7 @@ public void reset() {
* of nodes we've seen but can't yet output because they are not frozen.
*/
private static final class OutputNode implements RollingBuffer.Resettable {
private final List<Integer> inputNodes = new ArrayList<>();
private final IntArrayList inputNodes = new IntArrayList();

/** Node ID for this output, or -1 if we haven't been assigned yet. */
int node = -1;
Expand Down Expand Up @@ -175,8 +176,8 @@ private boolean releaseBufferedToken() {
}

int maxToNode = -1;
for (int inputNodeID : output.inputNodes) {
InputNode inputNode = inputNodes.get(inputNodeID);
for (IntCursor inputNodeID : output.inputNodes) {
InputNode inputNode = inputNodes.get(inputNodeID.value);
assert inputNode.outputNode == outputFrom;
maxToNode = Math.max(maxToNode, inputNode.maxToNode);
}
Expand Down Expand Up @@ -280,7 +281,7 @@ private void freeBefore(OutputNode output) {
Related tests testShingledGap, testShingledGapWithHoles
*/
outputFrom++;
int freeBefore = Collections.min(output.inputNodes);
int freeBefore = output.inputNodes.stream().min().orElseThrow();
// This will catch a node being freed early if it is input to the next output.
// Could a freed early node be input to a later output?
assert outputNodes.get(outputFrom).inputNodes.stream().noneMatch(n -> freeBefore > n)
Expand Down Expand Up @@ -349,7 +350,7 @@ public boolean incrementToken() throws IOException {
* The last node in the alt path didn't arrive to remove this reference.
*/
assert inputNodes.get(inputFrom).tokens.isEmpty() : "about to remove non empty edge";
outSrc.inputNodes.remove(Integer.valueOf(inputFrom));
outSrc.inputNodes.removeElement(inputFrom);
src.outputNode = -1;
int prevEndOffset = outSrc.endOffset;

Expand Down Expand Up @@ -381,8 +382,7 @@ public boolean incrementToken() throws IOException {

if (outputEndNode > dest.outputNode) {
if (dest.outputNode != -1) {
boolean removed =
outputNodes.get(dest.outputNode).inputNodes.remove(Integer.valueOf(inputTo));
boolean removed = outputNodes.get(dest.outputNode).inputNodes.removeElement(inputTo);
assert removed;
}
// System.out.println(" increase output node: " + dest.outputNode + " vs " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@
import org.apache.lucene.util.fst.FSTCompiler;
import org.apache.lucene.util.fst.IntSequenceOutputs;
import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.hppc.IntArrayList;
import org.apache.lucene.util.hppc.IntCursor;

/** In-memory structure for the dictionary (.dic) and affix (.aff) data of a hunspell dictionary. */
public class Dictionary {
Expand Down Expand Up @@ -330,8 +332,8 @@ static IntsRef nextArc(
*/
private void readAffixFile(InputStream affixStream, CharsetDecoder decoder, FlagEnumerator flags)
throws IOException, ParseException {
TreeMap<String, List<Integer>> prefixes = new TreeMap<>();
TreeMap<String, List<Integer>> suffixes = new TreeMap<>();
TreeMap<String, IntArrayList> prefixes = new TreeMap<>();
TreeMap<String, IntArrayList> suffixes = new TreeMap<>();
Set<Character> prefixContFlags = new HashSet<>();
Set<Character> suffixContFlags = new HashSet<>();
Map<String, Integer> seenPatterns = new HashMap<>();
Expand Down Expand Up @@ -643,17 +645,17 @@ private Breaks parseBreaks(LineNumberReader reader, String line)
return new Breaks(starting, ending, middle);
}

private FST<IntsRef> affixFST(TreeMap<String, List<Integer>> affixes) throws IOException {
private FST<IntsRef> affixFST(TreeMap<String, IntArrayList> affixes) throws IOException {
IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton();
FSTCompiler<IntsRef> fstCompiler =
new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE4, outputs).build();
IntsRefBuilder scratch = new IntsRefBuilder();
for (Map.Entry<String, List<Integer>> entry : affixes.entrySet()) {
for (Map.Entry<String, IntArrayList> entry : affixes.entrySet()) {
Util.toUTF32(entry.getKey(), scratch);
List<Integer> entries = entry.getValue();
IntArrayList entries = entry.getValue();
IntsRef output = new IntsRef(entries.size());
for (Integer c : entries) {
output.ints[output.length++] = c;
for (IntCursor c : entries) {
output.ints[output.length++] = c.value;
}
fstCompiler.add(scratch.get(), output);
}
Expand All @@ -670,7 +672,7 @@ private FST<IntsRef> affixFST(TreeMap<String, List<Integer>> affixes) throws IOE
* @throws IOException Can be thrown while reading the rule
*/
private void parseAffix(
TreeMap<String, List<Integer>> affixes,
TreeMap<String, IntArrayList> affixes,
Set<Character> secondStageFlags,
String header,
LineNumberReader reader,
Expand Down Expand Up @@ -792,7 +794,7 @@ private void parseAffix(
affixArg = new StringBuilder(affixArg).reverse().toString();
}

affixes.computeIfAbsent(affixArg, __ -> new ArrayList<>()).add(currentAffix);
affixes.computeIfAbsent(affixArg, __ -> new IntArrayList()).add(currentAffix);
currentAffix++;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.IntSequenceOutputs;
import org.apache.lucene.util.hppc.IntArrayList;

/**
* A data structure for memory-efficient word storage and fast lookup/enumeration. Each dictionary
Expand Down Expand Up @@ -262,7 +263,7 @@ static class Builder {

private final IntsRefBuilder currentOrds = new IntsRefBuilder();
private final List<char[]> group = new ArrayList<>();
private final List<Integer> morphDataIDs = new ArrayList<>();
private final IntArrayList morphDataIDs = new IntArrayList();
private String currentEntry = null;
private final int wordCount;
private final double hashFactor;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@
package org.apache.lucene.analysis.morph;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.EnumMap;
import java.util.List;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.hppc.IntArrayList;
import org.apache.lucene.util.hppc.IntCursor;
import org.apache.lucene.util.hppc.IntIntHashMap;

/** {@link Viterbi} subclass for n-best path calculation. */
Expand Down Expand Up @@ -75,12 +75,12 @@ protected final void backtraceNBest(final Position endPosData, final boolean use
if (VERBOSE) {
System.out.printf("DEBUG: 1-BEST COST: %d\n", bestCost);
}
for (int node : lattice.bestPathNodeList()) {
registerNode(node, fragment);
for (IntCursor node : lattice.bestPathNodeList()) {
registerNode(node.value, fragment);
}

for (int n = 2; ; ++n) {
List<Integer> nbest = lattice.nBestNodeList(n);
IntArrayList nbest = lattice.nBestNodeList(n);
if (nbest.isEmpty()) {
break;
}
Expand All @@ -91,8 +91,8 @@ protected final void backtraceNBest(final Position endPosData, final boolean use
if (bestCost + nBestCost < cost) {
break;
}
for (int node : nbest) {
registerNode(node, fragment);
for (IntCursor node : nbest) {
registerNode(node.value, fragment);
}
}
if (VERBOSE) {
Expand Down Expand Up @@ -558,8 +558,8 @@ void markSameSpanNode(int refNode, int value) {
}
}

List<Integer> bestPathNodeList() {
List<Integer> list = new ArrayList<>();
IntArrayList bestPathNodeList() {
IntArrayList list = new IntArrayList();
for (int node = nodeRightNode[0]; node != 1; node = nodeRightNode[node]) {
list.add(node);
markSameSpanNode(node, 1);
Expand All @@ -571,8 +571,8 @@ private int cost(int node) {
return nodeLeftCost[node] + nodeWordCost[node] + nodeRightCost[node];
}

List<Integer> nBestNodeList(int N) {
List<Integer> list = new ArrayList<>();
IntArrayList nBestNodeList(int N) {
IntArrayList list = new IntArrayList();
int leastCost = Integer.MAX_VALUE;
int leastLeft = -1;
int leastRight = -1;
Expand All @@ -591,8 +591,8 @@ List<Integer> nBestNodeList(int N) {
}
}
}
for (int node : list) {
markSameSpanNode(node, N);
for (IntCursor node : list) {
markSameSpanNode(node.value, N);
}
return list;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,13 @@
package org.apache.lucene.analysis.path;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.IgnoreRandomChains;
import org.apache.lucene.util.hppc.IntArrayList;

/**
* Tokenizer for domain-like hierarchies.
Expand Down Expand Up @@ -99,7 +98,7 @@ public ReversePathHierarchyTokenizer(
this.skip = skip;
resultToken = new StringBuilder(bufferSize);
resultTokenBuffer = new char[bufferSize];
delimiterPositions = new ArrayList<>(bufferSize / 10);
delimiterPositions = new IntArrayList(bufferSize / 10);
}

private static final int DEFAULT_BUFFER_SIZE = 1024;
Expand All @@ -120,7 +119,7 @@ public ReversePathHierarchyTokenizer(
private int skipped = 0;
private StringBuilder resultToken;

private List<Integer> delimiterPositions;
private IntArrayList delimiterPositions;
private int delimitersCount = -1;
private char[] resultTokenBuffer;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import java.io.IOException;
import java.io.Reader;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Set;
Expand All @@ -38,6 +37,7 @@
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FSTCompiler;
import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.hppc.IntArrayList;
import org.apache.lucene.util.hppc.IntHashSet;

/**
Expand Down Expand Up @@ -91,7 +91,7 @@ public Builder(boolean dedup) {
private static class MapEntry {
boolean includeOrig;
// we could sort for better sharing ultimately, but it could confuse people
ArrayList<Integer> ords = new ArrayList<>();
IntArrayList ords = new IntArrayList();
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,9 @@ private UserDictionary(List<String> entries) throws IOException {

String lastToken = null;
List<int[]> _segmentations = new ArrayList<>(entries.size());
List<Short> _rightIds = new ArrayList<>(entries.size());
short[] rightIds = new short[entries.size()];
long ord = 0;
int entryIndex = 0;
for (String entry : entries) {
String[] splits = entry.split("\\s+");
String token = splits[0];
Expand All @@ -92,12 +93,12 @@ private UserDictionary(List<String> entries) throws IOException {
char lastChar = entry.charAt(entry.length() - 1);
if (charDef.isHangul(lastChar)) {
if (charDef.hasCoda(lastChar)) {
_rightIds.add(RIGHT_ID_T);
rightIds[entryIndex++] = RIGHT_ID_T;
} else {
_rightIds.add(RIGHT_ID_F);
rightIds[entryIndex++] = RIGHT_ID_F;
}
} else {
_rightIds.add(RIGHT_ID);
rightIds[entryIndex++] = RIGHT_ID;
}

if (splits.length == 1) {
Expand Down Expand Up @@ -133,10 +134,7 @@ private UserDictionary(List<String> entries) throws IOException {
this.fst =
new TokenInfoFST(FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()));
int[][] segmentations = _segmentations.toArray(new int[_segmentations.size()][]);
short[] rightIds = new short[_rightIds.size()];
for (int i = 0; i < _rightIds.size(); i++) {
rightIds[i] = _rightIds.get(i);
}
assert entryIndex == rightIds.length;
this.morphAtts = new UserMorphData(segmentations, rightIds);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.cn.smart.Utility;
import org.apache.lucene.util.hppc.IntArrayList;
import org.apache.lucene.util.hppc.IntObjectHashMap;
import org.apache.lucene.util.hppc.ObjectCursor;

Expand Down Expand Up @@ -197,19 +198,18 @@ public List<SegToken> getShortPath() {
int preNode, lastNode;
lastNode = path.size() - 1;
current = lastNode;
List<Integer> rpath = new ArrayList<>();
IntArrayList rpath = new IntArrayList();
List<SegToken> resultPath = new ArrayList<>();

rpath.add(current);
while (current != 0) {
PathNode currentPathNode = path.get(current);
preNode = currentPathNode.preNode;
rpath.add(Integer.valueOf(preNode));
rpath.add(preNode);
current = preNode;
}
for (int j = rpath.size() - 1; j >= 0; j--) {
Integer idInteger = rpath.get(j);
int id = idInteger.intValue();
int id = rpath.get(j);
SegToken t = segTokenList.get(id);
resultPath.add(t);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.util.Map;
import java.util.Properties;
import java.util.StringTokenizer;
import org.apache.lucene.util.hppc.IntArrayList;

/**
* Perf run configuration properties.
Expand Down Expand Up @@ -338,15 +339,15 @@ private int[] propToIntArray(String s) {
return new int[] {Integer.parseInt(s)};
}

ArrayList<Integer> a = new ArrayList<>();
IntArrayList a = new IntArrayList();
StringTokenizer st = new StringTokenizer(s, ":");
while (st.hasMoreTokens()) {
String t = st.nextToken();
a.add(Integer.valueOf(t));
a.add(Integer.parseInt(t));
}
int[] res = new int[a.size()];
for (int i = 0; i < a.size(); i++) {
res[i] = a.get(i).intValue();
res[i] = a.get(i);
}
return res;
}
Expand Down
Loading