apache · bruno-roustant · May 25, 2024 · May 22, 2024 · May 23, 2024 · May 24, 2024
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
@@ -352,6 +352,8 @@ Optimizations
 
 * GITHUB#13400: Replace Set<Integer> by IntHashSet and Set<Long> by LongHashSet. (Bruno Roustant)
 
+* GITHUB#13406: Replace List<Integer> by IntArrayList and List<Long> by LongArrayList. (Bruno Roustant)
+
 Bug Fixes
 ---------------------
 

diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/FlattenGraphFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/core/FlattenGraphFilter.java
@@ -19,7 +19,6 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.Collections;
 import java.util.List;
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
@@ -29,6 +28,8 @@
 import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.RollingBuffer;
+import org.apache.lucene.util.hppc.IntArrayList;
+import org.apache.lucene.util.hppc.IntCursor;
 
 /**
  * Converts an incoming graph token stream, such as one from {@link SynonymGraphFilter}, into a flat
@@ -90,7 +91,7 @@ public void reset() {
    * of nodes we've seen but can't yet output because they are not frozen.
    */
   private static final class OutputNode implements RollingBuffer.Resettable {
-    private final List<Integer> inputNodes = new ArrayList<>();
+    private final IntArrayList inputNodes = new IntArrayList();
 
     /** Node ID for this output, or -1 if we haven't been assigned yet. */
     int node = -1;
@@ -175,8 +176,8 @@ private boolean releaseBufferedToken() {
       }
 
       int maxToNode = -1;
-      for (int inputNodeID : output.inputNodes) {
-        InputNode inputNode = inputNodes.get(inputNodeID);
+      for (IntCursor inputNodeID : output.inputNodes) {
+        InputNode inputNode = inputNodes.get(inputNodeID.value);
         assert inputNode.outputNode == outputFrom;
         maxToNode = Math.max(maxToNode, inputNode.maxToNode);
       }
@@ -280,7 +281,7 @@ private void freeBefore(OutputNode output) {
     Related tests testShingledGap, testShingledGapWithHoles
     */
     outputFrom++;
-    int freeBefore = Collections.min(output.inputNodes);
+    int freeBefore = output.inputNodes.stream().min().orElseThrow();
     // This will catch a node being freed early if it is input to the next output.
     // Could a freed early node be input to a later output?
     assert outputNodes.get(outputFrom).inputNodes.stream().noneMatch(n -> freeBefore > n)
@@ -349,7 +350,7 @@ public boolean incrementToken() throws IOException {
              * The last node in the alt path didn't arrive to remove this reference.
              */
             assert inputNodes.get(inputFrom).tokens.isEmpty() : "about to remove non empty edge";
-            outSrc.inputNodes.remove(Integer.valueOf(inputFrom));
+            outSrc.inputNodes.removeElement(inputFrom);
             src.outputNode = -1;
             int prevEndOffset = outSrc.endOffset;
 
@@ -381,8 +382,7 @@ public boolean incrementToken() throws IOException {
 
         if (outputEndNode > dest.outputNode) {
           if (dest.outputNode != -1) {
-            boolean removed =
-                outputNodes.get(dest.outputNode).inputNodes.remove(Integer.valueOf(inputTo));
+            boolean removed = outputNodes.get(dest.outputNode).inputNodes.removeElement(inputTo);
             assert removed;
           }
           // System.out.println("    increase output node: " + dest.outputNode + " vs " +

diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/Dictionary.java
@@ -60,6 +60,8 @@
 import org.apache.lucene.util.fst.FSTCompiler;
 import org.apache.lucene.util.fst.IntSequenceOutputs;
 import org.apache.lucene.util.fst.Util;
+import org.apache.lucene.util.hppc.IntArrayList;
+import org.apache.lucene.util.hppc.IntCursor;
 
 /** In-memory structure for the dictionary (.dic) and affix (.aff) data of a hunspell dictionary. */
 public class Dictionary {
@@ -330,8 +332,8 @@ static IntsRef nextArc(
    */
   private void readAffixFile(InputStream affixStream, CharsetDecoder decoder, FlagEnumerator flags)
       throws IOException, ParseException {
-    TreeMap<String, List<Integer>> prefixes = new TreeMap<>();
-    TreeMap<String, List<Integer>> suffixes = new TreeMap<>();
+    TreeMap<String, IntArrayList> prefixes = new TreeMap<>();
+    TreeMap<String, IntArrayList> suffixes = new TreeMap<>();
     Set<Character> prefixContFlags = new HashSet<>();
     Set<Character> suffixContFlags = new HashSet<>();
     Map<String, Integer> seenPatterns = new HashMap<>();
@@ -643,17 +645,17 @@ private Breaks parseBreaks(LineNumberReader reader, String line)
     return new Breaks(starting, ending, middle);
   }
 
-  private FST<IntsRef> affixFST(TreeMap<String, List<Integer>> affixes) throws IOException {
+  private FST<IntsRef> affixFST(TreeMap<String, IntArrayList> affixes) throws IOException {
     IntSequenceOutputs outputs = IntSequenceOutputs.getSingleton();
     FSTCompiler<IntsRef> fstCompiler =
         new FSTCompiler.Builder<>(FST.INPUT_TYPE.BYTE4, outputs).build();
     IntsRefBuilder scratch = new IntsRefBuilder();
-    for (Map.Entry<String, List<Integer>> entry : affixes.entrySet()) {
+    for (Map.Entry<String, IntArrayList> entry : affixes.entrySet()) {
       Util.toUTF32(entry.getKey(), scratch);
-      List<Integer> entries = entry.getValue();
+      IntArrayList entries = entry.getValue();
       IntsRef output = new IntsRef(entries.size());
-      for (Integer c : entries) {
-        output.ints[output.length++] = c;
+      for (IntCursor c : entries) {
+        output.ints[output.length++] = c.value;
       }
       fstCompiler.add(scratch.get(), output);
     }
@@ -670,7 +672,7 @@ private FST<IntsRef> affixFST(TreeMap<String, List<Integer>> affixes) throws IOE
    * @throws IOException Can be thrown while reading the rule
    */
   private void parseAffix(
-      TreeMap<String, List<Integer>> affixes,
+      TreeMap<String, IntArrayList> affixes,
       Set<Character> secondStageFlags,
       String header,
       LineNumberReader reader,
@@ -792,7 +794,7 @@ private void parseAffix(
         affixArg = new StringBuilder(affixArg).reverse().toString();
       }
 
-      affixes.computeIfAbsent(affixArg, __ -> new ArrayList<>()).add(currentAffix);
+      affixes.computeIfAbsent(affixArg, __ -> new IntArrayList()).add(currentAffix);
       currentAffix++;
     }
   }

diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/WordStorage.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/hunspell/WordStorage.java
@@ -28,6 +28,7 @@
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.IntsRefBuilder;
 import org.apache.lucene.util.fst.IntSequenceOutputs;
+import org.apache.lucene.util.hppc.IntArrayList;
 
 /**
  * A data structure for memory-efficient word storage and fast lookup/enumeration. Each dictionary
@@ -262,7 +263,7 @@ static class Builder {
 
     private final IntsRefBuilder currentOrds = new IntsRefBuilder();
     private final List<char[]> group = new ArrayList<>();
-    private final List<Integer> morphDataIDs = new ArrayList<>();
+    private final IntArrayList morphDataIDs = new IntArrayList();
     private String currentEntry = null;
     private final int wordCount;
     private final double hashFactor;

diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/ViterbiNBest.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/ViterbiNBest.java
@@ -17,13 +17,13 @@
 package org.apache.lucene.analysis.morph;
 
 import java.io.IOException;
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.EnumMap;
-import java.util.List;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.hppc.IntArrayList;
+import org.apache.lucene.util.hppc.IntCursor;
 import org.apache.lucene.util.hppc.IntIntHashMap;
 
 /** {@link Viterbi} subclass for n-best path calculation. */
@@ -75,12 +75,12 @@ protected final void backtraceNBest(final Position endPosData, final boolean use
     if (VERBOSE) {
       System.out.printf("DEBUG: 1-BEST COST: %d\n", bestCost);
     }
-    for (int node : lattice.bestPathNodeList()) {
-      registerNode(node, fragment);
+    for (IntCursor node : lattice.bestPathNodeList()) {
+      registerNode(node.value, fragment);
     }
 
     for (int n = 2; ; ++n) {
-      List<Integer> nbest = lattice.nBestNodeList(n);
+      IntArrayList nbest = lattice.nBestNodeList(n);
       if (nbest.isEmpty()) {
         break;
       }
@@ -91,8 +91,8 @@ protected final void backtraceNBest(final Position endPosData, final boolean use
       if (bestCost + nBestCost < cost) {
         break;
       }
-      for (int node : nbest) {
-        registerNode(node, fragment);
+      for (IntCursor node : nbest) {
+        registerNode(node.value, fragment);
       }
     }
     if (VERBOSE) {
@@ -558,8 +558,8 @@ void markSameSpanNode(int refNode, int value) {
       }
     }
 
-    List<Integer> bestPathNodeList() {
-      List<Integer> list = new ArrayList<>();
+    IntArrayList bestPathNodeList() {
+      IntArrayList list = new IntArrayList();
       for (int node = nodeRightNode[0]; node != 1; node = nodeRightNode[node]) {
         list.add(node);
         markSameSpanNode(node, 1);
@@ -571,8 +571,8 @@ private int cost(int node) {
       return nodeLeftCost[node] + nodeWordCost[node] + nodeRightCost[node];
     }
 
-    List<Integer> nBestNodeList(int N) {
-      List<Integer> list = new ArrayList<>();
+    IntArrayList nBestNodeList(int N) {
+      IntArrayList list = new IntArrayList();
       int leastCost = Integer.MAX_VALUE;
       int leastLeft = -1;
       int leastRight = -1;
@@ -591,8 +591,8 @@ List<Integer> nBestNodeList(int N) {
           }
         }
       }
-      for (int node : list) {
-        markSameSpanNode(node, N);
+      for (IntCursor node : list) {
+        markSameSpanNode(node.value, N);
       }
       return list;
     }

diff --git a/...alysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java b/...alysis/common/src/java/org/apache/lucene/analysis/path/ReversePathHierarchyTokenizer.java
@@ -17,14 +17,13 @@
 package org.apache.lucene.analysis.path;
 
 import java.io.IOException;
-import java.util.ArrayList;
-import java.util.List;
 import org.apache.lucene.analysis.Tokenizer;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.util.AttributeFactory;
 import org.apache.lucene.util.IgnoreRandomChains;
+import org.apache.lucene.util.hppc.IntArrayList;
 
 /**
  * Tokenizer for domain-like hierarchies.
@@ -99,7 +98,7 @@ public ReversePathHierarchyTokenizer(
     this.skip = skip;
     resultToken = new StringBuilder(bufferSize);
     resultTokenBuffer = new char[bufferSize];
-    delimiterPositions = new ArrayList<>(bufferSize / 10);
+    delimiterPositions = new IntArrayList(bufferSize / 10);
   }
 
   private static final int DEFAULT_BUFFER_SIZE = 1024;
@@ -120,7 +119,7 @@ public ReversePathHierarchyTokenizer(
   private int skipped = 0;
   private StringBuilder resultToken;
 
-  private List<Integer> delimiterPositions;
+  private IntArrayList delimiterPositions;
   private int delimitersCount = -1;
   private char[] resultTokenBuffer;
 

diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/synonym/SynonymMap.java
@@ -19,7 +19,6 @@
 import java.io.IOException;
 import java.io.Reader;
 import java.text.ParseException;
-import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.HashMap;
 import java.util.Set;
@@ -38,6 +37,7 @@
 import org.apache.lucene.util.fst.FST;
 import org.apache.lucene.util.fst.FSTCompiler;
 import org.apache.lucene.util.fst.Util;
+import org.apache.lucene.util.hppc.IntArrayList;
 import org.apache.lucene.util.hppc.IntHashSet;
 
 /**
@@ -91,7 +91,7 @@ public Builder(boolean dedup) {
     private static class MapEntry {
       boolean includeOrig;
       // we could sort for better sharing ultimately, but it could confuse people
-      ArrayList<Integer> ords = new ArrayList<>();
+      IntArrayList ords = new IntArrayList();
     }
 
     /**

diff --git a/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java b/lucene/analysis/nori/src/java/org/apache/lucene/analysis/ko/dict/UserDictionary.java
@@ -81,8 +81,9 @@ private UserDictionary(List<String> entries) throws IOException {
 
     String lastToken = null;
     List<int[]> _segmentations = new ArrayList<>(entries.size());
-    List<Short> _rightIds = new ArrayList<>(entries.size());
+    short[] rightIds = new short[entries.size()];
     long ord = 0;
+    int entryIndex = 0;
     for (String entry : entries) {
       String[] splits = entry.split("\\s+");
       String token = splits[0];
@@ -92,12 +93,12 @@ private UserDictionary(List<String> entries) throws IOException {
       char lastChar = entry.charAt(entry.length() - 1);
       if (charDef.isHangul(lastChar)) {
         if (charDef.hasCoda(lastChar)) {
-          _rightIds.add(RIGHT_ID_T);
+          rightIds[entryIndex++] = RIGHT_ID_T;
         } else {
-          _rightIds.add(RIGHT_ID_F);
+          rightIds[entryIndex++] = RIGHT_ID_F;
         }
       } else {
-        _rightIds.add(RIGHT_ID);
+        rightIds[entryIndex++] = RIGHT_ID;
       }
 
       if (splits.length == 1) {
@@ -133,10 +134,7 @@ private UserDictionary(List<String> entries) throws IOException {
     this.fst =
         new TokenInfoFST(FST.fromFSTReader(fstCompiler.compile(), fstCompiler.getFSTReader()));
     int[][] segmentations = _segmentations.toArray(new int[_segmentations.size()][]);
-    short[] rightIds = new short[_rightIds.size()];
-    for (int i = 0; i < _rightIds.size(); i++) {
-      rightIds[i] = _rightIds.get(i);
-    }
+    assert entryIndex == rightIds.length;
     this.morphAtts = new UserMorphData(segmentations, rightIds);
   }
 

diff --git a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BiSegGraph.java b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BiSegGraph.java
@@ -19,6 +19,7 @@
 import java.util.ArrayList;
 import java.util.List;
 import org.apache.lucene.analysis.cn.smart.Utility;
+import org.apache.lucene.util.hppc.IntArrayList;
 import org.apache.lucene.util.hppc.IntObjectHashMap;
 import org.apache.lucene.util.hppc.ObjectCursor;
 
@@ -197,19 +198,18 @@ public List<SegToken> getShortPath() {
     int preNode, lastNode;
     lastNode = path.size() - 1;
     current = lastNode;
-    List<Integer> rpath = new ArrayList<>();
+    IntArrayList rpath = new IntArrayList();
     List<SegToken> resultPath = new ArrayList<>();
 
     rpath.add(current);
     while (current != 0) {
       PathNode currentPathNode = path.get(current);
       preNode = currentPathNode.preNode;
-      rpath.add(Integer.valueOf(preNode));
+      rpath.add(preNode);
       current = preNode;
     }
     for (int j = rpath.size() - 1; j >= 0; j--) {
-      Integer idInteger = rpath.get(j);
-      int id = idInteger.intValue();
+      int id = rpath.get(j);
       SegToken t = segTokenList.get(id);
       resultPath.add(t);
     }

diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Config.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Config.java
@@ -27,6 +27,7 @@
 import java.util.Map;
 import java.util.Properties;
 import java.util.StringTokenizer;
+import org.apache.lucene.util.hppc.IntArrayList;
 
 /**
  * Perf run configuration properties.
@@ -338,15 +339,15 @@ private int[] propToIntArray(String s) {
       return new int[] {Integer.parseInt(s)};
     }
 
-    ArrayList<Integer> a = new ArrayList<>();
+    IntArrayList a = new IntArrayList();
     StringTokenizer st = new StringTokenizer(s, ":");
     while (st.hasMoreTokens()) {
       String t = st.nextToken();
-      a.add(Integer.valueOf(t));
+      a.add(Integer.parseInt(t));
     }
     int[] res = new int[a.size()];
     for (int i = 0; i < a.size(); i++) {
-      res[i] = a.get(i).intValue();
+      res[i] = a.get(i);
     }
     return res;
   }
-Original file line number
+Diff line change
@@ Expand Up / @@ -352,6 +352,8 @@ Optimizations @@
     * GITHUB#13400: Replace Set<Integer> by IntHashSet and Set<Long> by LongHashSet. (Bruno Roustant)
+    * GITHUB#13406: Replace List<Integer> by IntArrayList and List<Long> by LongArrayList. (Bruno Roustant)
     Bug Fixes
     ---------------------
@@ Expand Down @@