Skip to content

Commit

Permalink
Replace List<Integer> by IntArrayList and List<Long> by LongArrayList. (
Browse files Browse the repository at this point in the history
  • Loading branch information
bruno-roustant authored May 25, 2024
1 parent 90e07f6 commit 444d4e7
Show file tree
Hide file tree
Showing 40 changed files with 2,008 additions and 159 deletions.
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -354,6 +354,8 @@ Optimizations

* GITHUB#13400: Replace Set<Integer> by IntHashSet and Set<Long> by LongHashSet. (Bruno Roustant)

* GITHUB#13406: Replace List<Integer> by IntArrayList and List<Long> by LongArrayList. (Bruno Roustant)

Bug Fixes
---------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
Expand All @@ -29,6 +28,8 @@
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.util.AttributeSource;
import org.apache.lucene.util.RollingBuffer;
import org.apache.lucene.util.hppc.IntArrayList;
import org.apache.lucene.util.hppc.IntCursor;

/**
* Converts an incoming graph token stream, such as one from {@link SynonymGraphFilter}, into a flat
Expand Down Expand Up @@ -90,7 +91,7 @@ public void reset() {
* of nodes we've seen but can't yet output because they are not frozen.
*/
private static final class OutputNode implements RollingBuffer.Resettable {
private final List<Integer> inputNodes = new ArrayList<>();
private final IntArrayList inputNodes = new IntArrayList();

/** Node ID for this output, or -1 if we haven't been assigned yet. */
int node = -1;
Expand Down Expand Up @@ -175,8 +176,8 @@ private boolean releaseBufferedToken() {
}

int maxToNode = -1;
for (int inputNodeID : output.inputNodes) {
InputNode inputNode = inputNodes.get(inputNodeID);
for (IntCursor inputNodeID : output.inputNodes) {
InputNode inputNode = inputNodes.get(inputNodeID.value);
assert inputNode.outputNode == outputFrom;
maxToNode = Math.max(maxToNode, inputNode.maxToNode);
}
Expand Down Expand Up @@ -280,7 +281,7 @@ private void freeBefore(OutputNode output) {
Related tests testShingledGap, testShingledGapWithHoles
*/
outputFrom++;
int freeBefore = Collections.min(output.inputNodes);
int freeBefore = output.inputNodes.stream().min().orElseThrow();
// This will catch a node being freed early if it is input to the next output.
// Could a freed early node be input to a later output?
assert outputNodes.get(outputFrom).inputNodes.stream().noneMatch(n -> freeBefore > n)
Expand Down Expand Up @@ -349,7 +350,7 @@ public boolean incrementToken() throws IOException {
* The last node in the alt path didn't arrive to remove this reference.
*/
assert inputNodes.get(inputFrom).tokens.isEmpty() : "about to remove non empty edge";
outSrc.inputNodes.remove(Integer.valueOf(inputFrom));
outSrc.inputNodes.removeElement(inputFrom);
src.outputNode = -1;
int prevEndOffset = outSrc.endOffset;

Expand Down Expand Up @@ -381,8 +382,7 @@ public boolean incrementToken() throws IOException {

if (outputEndNode > dest.outputNode) {
if (dest.outputNode != -1) {
boolean removed =
outputNodes.get(dest.outputNode).inputNodes.remove(Integer.valueOf(inputTo));
boolean removed = outputNodes.get(dest.outputNode).inputNodes.removeElement(inputTo);
assert removed;
}
// System.out.println(" increase output node: " + dest.outputNode + " vs " +
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@
import org.apache.lucene.util.fst.FSTCompiler;
import org.apache.lucene.util.fst.IntSequenceOutputs;
import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.hppc.IntArrayList;
import org.apache.lucene.util.hppc.IntCursor;

/** In-memory structure for the dictionary (.dic) and affix (.aff) data of a hunspell dictionary. */
public class Dictionary {
Expand Down Expand Up @@ -330,8 +332,8 @@ static IntsRef nextArc(
*/
private void readAffixFile(InputStream affixStream, CharsetDecoder decoder, FlagEnumerator flags)
throws IOException, ParseException {
TreeMap<String, List<Integer>> prefixes = new TreeMap<>();
TreeMap<String, List<Integer>> suffixes = new TreeMap<>();
TreeMap<String, IntArrayList> prefixes = new TreeMap<>();
TreeMap<String, IntArrayList> suffixes = new TreeMap<>();
Set<Character> prefixContFlags = new HashSet<>();
Set<Character> suffixContFlags = new HashSet<>();
Map<String, Integer> seenPatterns = new HashMap<>();
Expand Down Expand Up @@ -643,17 +645,17 @@ private Breaks parseBreaks(LineNumberReader reader, String line)
return new Breaks(starting, ending, middle);
}

private FST<IntsRef> affixFST(TreeMap<String, List<Integer>> affixes) throws IOException {
private FST<IntsRef> affixFST(TreeMap<String, IntArrayList> affixes) throws IOException {
IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton();
FSTCompiler<IntsRef> fstCompiler =
new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE4, outputs).build();
IntsRefBuilder scratch = new IntsRefBuilder();
for (Map.Entry<String, List<Integer>> entry : affixes.entrySet()) {
for (Map.Entry<String, IntArrayList> entry : affixes.entrySet()) {
Util.toUTF32(entry.getKey(), scratch);
List<Integer> entries = entry.getValue();
IntArrayList entries = entry.getValue();
IntsRef output = new IntsRef(entries.size());
for (Integer c : entries) {
output.ints[output.length++] = c;
for (IntCursor c : entries) {
output.ints[output.length++] = c.value;
}
fstCompiler.add(scratch.get(), output);
}
Expand All @@ -670,7 +672,7 @@ private FST<IntsRef> affixFST(TreeMap<String, List<Integer>> affixes) throws IOE
* @throws IOException Can be thrown while reading the rule
*/
private void parseAffix(
TreeMap<String, List<Integer>> affixes,
TreeMap<String, IntArrayList> affixes,
Set<Character> secondStageFlags,
String header,
LineNumberReader reader,
Expand Down Expand Up @@ -792,7 +794,7 @@ private void parseAffix(
affixArg = new StringBuilder(affixArg).reverse().toString();
}

affixes.computeIfAbsent(affixArg, __ -> new ArrayList<>()).add(currentAffix);
affixes.computeIfAbsent(affixArg, __ -> new IntArrayList()).add(currentAffix);
currentAffix++;
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.fst.IntSequenceOutputs;
import org.apache.lucene.util.hppc.IntArrayList;

/**
* A data structure for memory-efficient word storage and fast lookup/enumeration. Each dictionary
Expand Down Expand Up @@ -262,7 +263,7 @@ static class Builder {

private final IntsRefBuilder currentOrds = new IntsRefBuilder();
private final List<char[]> group = new ArrayList<>();
private final List<Integer> morphDataIDs = new ArrayList<>();
private final IntArrayList morphDataIDs = new IntArrayList();
private String currentEntry = null;
private final int wordCount;
private final double hashFactor;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,13 +17,13 @@
package org.apache.lucene.analysis.morph;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.EnumMap;
import java.util.List;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.hppc.IntArrayList;
import org.apache.lucene.util.hppc.IntCursor;
import org.apache.lucene.util.hppc.IntIntHashMap;

/** {@link Viterbi} subclass for n-best path calculation. */
Expand Down Expand Up @@ -75,12 +75,12 @@ protected final void backtraceNBest(final Position endPosData, final boolean use
if (VERBOSE) {
System.out.printf("DEBUG: 1-BEST COST: %d\n", bestCost);
}
for (int node : lattice.bestPathNodeList()) {
registerNode(node, fragment);
for (IntCursor node : lattice.bestPathNodeList()) {
registerNode(node.value, fragment);
}

for (int n = 2; ; ++n) {
List<Integer> nbest = lattice.nBestNodeList(n);
IntArrayList nbest = lattice.nBestNodeList(n);
if (nbest.isEmpty()) {
break;
}
Expand All @@ -91,8 +91,8 @@ protected final void backtraceNBest(final Position endPosData, final boolean use
if (bestCost + nBestCost < cost) {
break;
}
for (int node : nbest) {
registerNode(node, fragment);
for (IntCursor node : nbest) {
registerNode(node.value, fragment);
}
}
if (VERBOSE) {
Expand Down Expand Up @@ -558,8 +558,8 @@ void markSameSpanNode(int refNode, int value) {
}
}

List<Integer> bestPathNodeList() {
List<Integer> list = new ArrayList<>();
IntArrayList bestPathNodeList() {
IntArrayList list = new IntArrayList();
for (int node = nodeRightNode[0]; node != 1; node = nodeRightNode[node]) {
list.add(node);
markSameSpanNode(node, 1);
Expand All @@ -571,8 +571,8 @@ private int cost(int node) {
return nodeLeftCost[node] + nodeWordCost[node] + nodeRightCost[node];
}

List<Integer> nBestNodeList(int N) {
List<Integer> list = new ArrayList<>();
IntArrayList nBestNodeList(int N) {
IntArrayList list = new IntArrayList();
int leastCost = Integer.MAX_VALUE;
int leastLeft = -1;
int leastRight = -1;
Expand All @@ -591,8 +591,8 @@ List<Integer> nBestNodeList(int N) {
}
}
}
for (int node : list) {
markSameSpanNode(node, N);
for (IntCursor node : list) {
markSameSpanNode(node.value, N);
}
return list;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,13 @@
package org.apache.lucene.analysis.path;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.Tokenizer;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.IgnoreRandomChains;
import org.apache.lucene.util.hppc.IntArrayList;

/**
* Tokenizer for domain-like hierarchies.
Expand Down Expand Up @@ -99,7 +98,7 @@ public ReversePathHierarchyTokenizer(
this.skip = skip;
resultToken = new StringBuilder(bufferSize);
resultTokenBuffer = new char[bufferSize];
delimiterPositions = new ArrayList<>(bufferSize / 10);
delimiterPositions = new IntArrayList(bufferSize / 10);
}

private static final int DEFAULT_BUFFER_SIZE = 1024;
Expand All @@ -120,7 +119,7 @@ public ReversePathHierarchyTokenizer(
private int skipped = 0;
private StringBuilder resultToken;

private List<Integer> delimiterPositions;
private IntArrayList delimiterPositions;
private int delimitersCount = -1;
private char[] resultTokenBuffer;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import java.io.IOException;
import java.io.Reader;
import java.text.ParseException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Set;
Expand All @@ -38,6 +37,7 @@
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.fst.FSTCompiler;
import org.apache.lucene.util.fst.Util;
import org.apache.lucene.util.hppc.IntArrayList;
import org.apache.lucene.util.hppc.IntHashSet;

/**
Expand Down Expand Up @@ -91,7 +91,7 @@ public Builder(boolean dedup) {
private static class MapEntry {
boolean includeOrig;
// we could sort for better sharing ultimately, but it could confuse people
ArrayList<Integer> ords = new ArrayList<>();
IntArrayList ords = new IntArrayList();
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -81,8 +81,9 @@ private UserDictionary(List<String> entries) throws IOException {

String lastToken = null;
List<int[]> _segmentations = new ArrayList<>(entries.size());
List<Short> _rightIds = new ArrayList<>(entries.size());
short[] rightIds = new short[entries.size()];
long ord = 0;
int entryIndex = 0;
for (String entry : entries) {
String[] splits = entry.split("\\s+");
String token = splits[0];
Expand All @@ -92,12 +93,12 @@ private UserDictionary(List<String> entries) throws IOException {
char lastChar = entry.charAt(entry.length() - 1);
if (charDef.isHangul(lastChar)) {
if (charDef.hasCoda(lastChar)) {
_rightIds.add(RIGHT_ID_T);
rightIds[entryIndex++] = RIGHT_ID_T;
} else {
_rightIds.add(RIGHT_ID_F);
rightIds[entryIndex++] = RIGHT_ID_F;
}
} else {
_rightIds.add(RIGHT_ID);
rightIds[entryIndex++] = RIGHT_ID;
}

if (splits.length == 1) {
Expand Down Expand Up @@ -133,10 +134,7 @@ private UserDictionary(List<String> entries) throws IOException {
this.fst =
new TokenInfoFST(FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()));
int[][] segmentations = _segmentations.toArray(new int[_segmentations.size()][]);
short[] rightIds = new short[_rightIds.size()];
for (int i = 0; i < _rightIds.size(); i++) {
rightIds[i] = _rightIds.get(i);
}
assert entryIndex == rightIds.length;
this.morphAtts = new UserMorphData(segmentations, rightIds);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import java.util.ArrayList;
import java.util.List;
import org.apache.lucene.analysis.cn.smart.Utility;
import org.apache.lucene.util.hppc.IntArrayList;
import org.apache.lucene.util.hppc.IntObjectHashMap;
import org.apache.lucene.util.hppc.ObjectCursor;

Expand Down Expand Up @@ -197,19 +198,18 @@ public List<SegToken> getShortPath() {
int preNode, lastNode;
lastNode = path.size() - 1;
current = lastNode;
List<Integer> rpath = new ArrayList<>();
IntArrayList rpath = new IntArrayList();
List<SegToken> resultPath = new ArrayList<>();

rpath.add(current);
while (current != 0) {
PathNode currentPathNode = path.get(current);
preNode = currentPathNode.preNode;
rpath.add(Integer.valueOf(preNode));
rpath.add(preNode);
current = preNode;
}
for (int j = rpath.size() - 1; j >= 0; j--) {
Integer idInteger = rpath.get(j);
int id = idInteger.intValue();
int id = rpath.get(j);
SegToken t = segTokenList.get(id);
resultPath.add(t);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import java.util.Map;
import java.util.Properties;
import java.util.StringTokenizer;
import org.apache.lucene.util.hppc.IntArrayList;

/**
* Perf run configuration properties.
Expand Down Expand Up @@ -338,15 +339,15 @@ private int[] propToIntArray(String s) {
return new int[] {Integer.parseInt(s)};
}

ArrayList<Integer> a = new ArrayList<>();
IntArrayList a = new IntArrayList();
StringTokenizer st = new StringTokenizer(s, ":");
while (st.hasMoreTokens()) {
String t = st.nextToken();
a.add(Integer.valueOf(t));
a.add(Integer.parseInt(t));
}
int[] res = new int[a.size()];
for (int i = 0; i < a.size(); i++) {
res[i] = a.get(i).intValue();
res[i] = a.get(i);
}
return res;
}
Expand Down
Loading

0 comments on commit 444d4e7

Please sign in to comment.