apache · bruno-roustant · May 18, 2024 · May 14, 2024 · May 14, 2024 · May 18, 2024
diff --git a/...e/analysis/common/src/java/org/apache/lucene/analysis/hunspell/SuggestibleEntryCache.java b/...e/analysis/common/src/java/org/apache/lucene/analysis/hunspell/SuggestibleEntryCache.java
@@ -16,12 +16,11 @@
  */
 package org.apache.lucene.analysis.hunspell;
 
-import java.util.HashMap;
-import java.util.Map;
 import java.util.function.Consumer;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.IntsRef;
+import org.apache.lucene.util.hppc.IntObjectHashMap;
 
 /**
  * A cache allowing for CPU-cache-friendlier iteration over {@link WordStorage} entries that can be
@@ -35,9 +34,7 @@ class SuggestibleEntryCache {
 
   private final Section[] sections;
 
-  private SuggestibleEntryCache(Map<Integer, SectionBuilder> builders) {
-    int maxLength =
-        builders.isEmpty() ? 0 : builders.keySet().stream().max(Integer::compare).orElseThrow();
+  private SuggestibleEntryCache(IntObjectHashMap<SectionBuilder> builders, int maxLength) {
     sections = new Section[maxLength + 1];
     for (int i = 0; i < sections.length; i++) {
       SectionBuilder builder = builders.get(i);
@@ -48,22 +45,33 @@ private SuggestibleEntryCache(Map<Integer, SectionBuilder> builders) {
   static SuggestibleEntryCache buildCache(WordStorage storage) {
     var consumer =
         new Consumer<FlyweightEntry>() {
-          final Map<Integer, SectionBuilder> builders = new HashMap<>();
+          final IntObjectHashMap<SectionBuilder> builders = new IntObjectHashMap<>();
+          int maxLength;
 
           @Override
           public void accept(FlyweightEntry entry) {
             CharsRef root = entry.root();
             if (root.length > Short.MAX_VALUE) {
               throw new UnsupportedOperationException(
                   "Too long dictionary entry, please report this to dev@lucene.apache.org");
+            } else if (root.length > maxLength) {
+              maxLength = root.length;
             }
 
-            builders.computeIfAbsent(root.length, __ -> new SectionBuilder()).add(entry);
+            SectionBuilder builder;
+            int index = builders.indexOf(root.length);
+            if (index < 0) {
+              builder = new SectionBuilder();
+              builders.indexInsert(index, root.length, builder);
+            } else {
+              builder = builders.indexGet(index);
+            }
+            builder.add(entry);
           }
         };
     storage.processSuggestibleWords(1, Integer.MAX_VALUE, consumer);
 
-    return new SuggestibleEntryCache(consumer.builders);
+    return new SuggestibleEntryCache(consumer.builders, consumer.maxLength);
   }
 
   private static class SectionBuilder {

diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/ViterbiNBest.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/morph/ViterbiNBest.java
@@ -21,10 +21,10 @@
 import java.util.Arrays;
 import java.util.Collections;
 import java.util.EnumMap;
-import java.util.HashMap;
 import java.util.List;
 import org.apache.lucene.util.ArrayUtil;
 import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.hppc.IntIntHashMap;
 
 /** {@link Viterbi} subclass for n-best path calculation. */
 public abstract class ViterbiNBest<T extends Token, U extends MorphData>
@@ -137,14 +137,14 @@ protected final void fixupPendingList() {
     }
 
     // offset=>position map
-    HashMap<Integer, Integer> map = new HashMap<>();
+    IntIntHashMap map = new IntIntHashMap();
     for (Token t : pending) {
       map.put(t.getOffset(), 0);
       map.put(t.getOffset() + t.getLength(), 0);
     }
 
     // Get unique and sorted list of all edge position of tokens.
-    Integer[] offsets = map.keySet().toArray(new Integer[0]);
+    int[] offsets = map.keys().toArray();
     Arrays.sort(offsets);
 
     // setup all value of map.  It specifies N-th position from begin.

diff --git a/...nalysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java b/...nalysis/icu/src/java/org/apache/lucene/analysis/icu/segmentation/ICUTokenizerFactory.java
@@ -24,14 +24,14 @@
 import java.io.IOException;
 import java.io.InputStream;
 import java.nio.charset.StandardCharsets;
-import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import org.apache.lucene.analysis.TokenizerFactory;
 import org.apache.lucene.util.AttributeFactory;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.ResourceLoader;
 import org.apache.lucene.util.ResourceLoaderAware;
+import org.apache.lucene.util.hppc.IntObjectHashMap;
 
 /**
  * Factory for {@link ICUTokenizer}. Words are broken across script boundaries, then segmented
@@ -74,15 +74,15 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
   public static final String NAME = "icu";
 
   static final String RULEFILES = "rulefiles";
-  private final Map<Integer, String> tailored;
+  private final IntObjectHashMap<String> tailored;
   private ICUTokenizerConfig config;
   private final boolean cjkAsWords;
   private final boolean myanmarAsWords;
 
   /** Creates a new ICUTokenizerFactory */
   public ICUTokenizerFactory(Map<String, String> args) {
     super(args);
-    tailored = new HashMap<>();
+    tailored = new IntObjectHashMap<>();
     String rulefilesArg = get(args, RULEFILES);
     if (rulefilesArg != null) {
       List<String> scriptAndResourcePaths = splitFileNames(rulefilesArg);
@@ -113,9 +113,9 @@ public void inform(ResourceLoader loader) throws IOException {
     } else {
       final BreakIterator[] breakers =
           new BreakIterator[1 + UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT)];
-      for (Map.Entry<Integer, String> entry : tailored.entrySet()) {
-        int code = entry.getKey();
-        String resourcePath = entry.getValue();
+      for (IntObjectHashMap.IntObjectCursor<String> entry : tailored) {
+        int code = entry.key;
+        String resourcePath = entry.value;
         breakers[code] = parseRules(resourcePath, loader);
       }
       config =

diff --git a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BiSegGraph.java b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/BiSegGraph.java
@@ -17,11 +17,9 @@
 package org.apache.lucene.analysis.cn.smart.hhmm;
 
 import java.util.ArrayList;
-import java.util.Collection;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 import org.apache.lucene.analysis.cn.smart.Utility;
+import org.apache.lucene.util.hppc.IntObjectHashMap;
 
 /**
  * Graph representing possible token pairs (bigrams) at each start offset in the sentence.
@@ -32,7 +30,7 @@
  */
 class BiSegGraph {
 
-  private Map<Integer, ArrayList<SegTokenPair>> tokenPairListTable = new HashMap<>();
+  private IntObjectHashMap<ArrayList<SegTokenPair>> tokenPairListTable = new IntObjectHashMap<>();
 
   private List<SegToken> segTokenList;
 
@@ -122,7 +120,7 @@ private void generateBiSegGraph(SegGraph segGraph) {
    * @return true if a token pair exists
    */
   public boolean isToExist(int to) {
-    return tokenPairListTable.get(Integer.valueOf(to)) != null;
+    return tokenPairListTable.get(to) != null;
   }
 
   /**
@@ -220,9 +218,8 @@ public List<SegToken> getShortPath() {
   @Override
   public String toString() {
     StringBuilder sb = new StringBuilder();
-    Collection<ArrayList<SegTokenPair>> values = tokenPairListTable.values();
-    for (ArrayList<SegTokenPair> segList : values) {
-      for (SegTokenPair pair : segList) {
+    for (IntObjectHashMap.ObjectCursor<ArrayList<SegTokenPair>> segList : tokenPairListTable.values()) {
+      for (SegTokenPair pair : segList.value) {
         sb.append(pair).append("\n");
       }
     }

diff --git a/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java b/lucene/analysis/smartcn/src/java/org/apache/lucene/analysis/cn/smart/hhmm/SegGraph.java
@@ -16,10 +16,10 @@
  */
 package org.apache.lucene.analysis.cn.smart.hhmm;
 
+import org.apache.lucene.util.hppc.IntObjectHashMap;
+
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.List;
-import java.util.Map;
 
 /**
  * Graph representing possible tokens at each start offset in the sentence.
@@ -31,7 +31,7 @@
 class SegGraph {
 
   /** Map of start offsets to ArrayList of tokens at that position */
-  private Map<Integer, ArrayList<SegToken>> tokenListTable = new HashMap<>();
+  private IntObjectHashMap<ArrayList<SegToken>> tokenListTable = new IntObjectHashMap<>();
 
   private int maxStart = -1;
 

diff --git a/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java b/lucene/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/SpatialDocMaker.java
@@ -17,7 +17,6 @@
 package org.apache.lucene.benchmark.byTask.feeds;
 
 import java.util.AbstractMap;
-import java.util.HashMap;
 import java.util.Map;
 import java.util.Random;
 import java.util.Set;
@@ -31,6 +30,7 @@
 import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
 import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTreeFactory;
 import org.apache.lucene.spatial.serialized.SerializedDVStrategy;
+import org.apache.lucene.util.hppc.IntObjectHashMap;
 import org.locationtech.spatial4j.context.SpatialContext;
 import org.locationtech.spatial4j.context.SpatialContextFactory;
 import org.locationtech.spatial4j.shape.Point;
@@ -50,7 +50,7 @@ public class SpatialDocMaker extends DocMaker {
   public static final String SPATIAL_FIELD = "spatial";
 
   // cache spatialStrategy by round number
-  private static Map<Integer, SpatialStrategy> spatialStrategyCache = new HashMap<>();
+  private static IntObjectHashMap<SpatialStrategy> spatialStrategyCache = new IntObjectHashMap<>();
 
   private SpatialStrategy strategy;
   private ShapeConverter shapeConverter;

diff --git a/lucene/core/src/java/org/apache/lucene/analysis/AutomatonToTokenStream.java b/lucene/core/src/java/org/apache/lucene/analysis/AutomatonToTokenStream.java
@@ -19,16 +19,15 @@
 
 import java.io.IOException;
 import java.util.ArrayList;
-import java.util.HashMap;
 import java.util.LinkedList;
 import java.util.List;
-import java.util.Map;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
 import org.apache.lucene.util.automaton.Automaton;
 import org.apache.lucene.util.automaton.Transition;
+import org.apache.lucene.util.hppc.IntIntHashMap;
 
 /** Converts an Automaton into a TokenStream. */
 public class AutomatonToTokenStream {
@@ -61,7 +60,7 @@ public static TokenStream toTokenStream(Automaton automaton) {
     }
 
     LinkedList<RemapNode> noIncomingEdges = new LinkedList<>();
-    Map<Integer, Integer> idToPos = new HashMap<>();
+    IntIntHashMap idToPos = new IntIntHashMap();
     noIncomingEdges.addLast(new RemapNode(0, 0));
     while (noIncomingEdges.isEmpty() == false) {
       RemapNode currState = noIncomingEdges.removeFirst();

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90NormsProducer.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90NormsProducer.java
@@ -20,8 +20,6 @@
 import static org.apache.lucene.codecs.lucene90.Lucene90NormsFormat.VERSION_START;
 
 import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.NormsProducer;
 import org.apache.lucene.index.CorruptIndexException;
@@ -35,17 +33,18 @@
 import org.apache.lucene.store.IndexInput;
 import org.apache.lucene.store.RandomAccessInput;
 import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.hppc.IntObjectHashMap;
 
 /** Reader for {@link Lucene90NormsFormat} */
 final class Lucene90NormsProducer extends NormsProducer implements Cloneable {
   // metadata maps (just file pointers and minimal stuff)
-  private final Map<Integer, NormsEntry> norms = new HashMap<>();
+  private final IntObjectHashMap<NormsEntry> norms = new IntObjectHashMap<>();
   private final int maxDoc;
   private IndexInput data;
   private boolean merging;
-  private Map<Integer, IndexInput> disiInputs;
-  private Map<Integer, RandomAccessInput> disiJumpTables;
-  private Map<Integer, RandomAccessInput> dataInputs;
+  private IntObjectHashMap<IndexInput> disiInputs;
+  private IntObjectHashMap<RandomAccessInput> disiJumpTables;
+  private IntObjectHashMap<RandomAccessInput> dataInputs;
 
   Lucene90NormsProducer(
       SegmentReadState state,
@@ -121,9 +120,9 @@ public NormsProducer getMergeInstance() {
       throw new RuntimeException(e);
     }
     clone.data = data.clone();
-    clone.disiInputs = new HashMap<>();
-    clone.disiJumpTables = new HashMap<>();
-    clone.dataInputs = new HashMap<>();
+    clone.disiInputs = new IntObjectHashMap<>();
+    clone.disiJumpTables = new IntObjectHashMap<>();
+    clone.dataInputs = new IntObjectHashMap<>();
     clone.merging = true;
     return clone;
   }

diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsReader.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsReader.java
@@ -17,8 +17,6 @@
 package org.apache.lucene.codecs.lucene90;
 
 import java.io.IOException;
-import java.util.HashMap;
-import java.util.Map;
 import org.apache.lucene.codecs.CodecUtil;
 import org.apache.lucene.codecs.PointsReader;
 import org.apache.lucene.index.CorruptIndexException;
@@ -31,12 +29,13 @@
 import org.apache.lucene.store.ReadAdvice;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.bkd.BKDReader;
+import org.apache.lucene.util.hppc.IntObjectHashMap;
 
 /** Reads point values previously written with {@link Lucene90PointsWriter} */
 public class Lucene90PointsReader extends PointsReader {
   final IndexInput indexIn, dataIn;
   final SegmentReadState readState;
-  final Map<Integer, PointValues> readers = new HashMap<>();
+  final IntObjectHashMap<PointValues> readers = new IntObjectHashMap<>();
 
   /** Sole constructor */
   public Lucene90PointsReader(SegmentReadState readState) throws IOException {

diff --git a/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java b/lucene/core/src/java/org/apache/lucene/index/FieldInfos.java
@@ -34,6 +34,7 @@
 import java.util.stream.Collectors;
 import java.util.stream.StreamSupport;
 import org.apache.lucene.util.CollectionUtil;
+import org.apache.lucene.util.hppc.IntObjectHashMap;
 
 /**
  * Collection of {@link FieldInfo}s (accessible by number or by name).
@@ -374,7 +375,7 @@ static final class FieldVectorProperties {
 
   static final class FieldNumbers {
 
-    private final Map<Integer, String> numberToName;
+    private final IntObjectHashMap<String> numberToName;
     private final Map<String, Integer> nameToNumber;
     private final Map<String, IndexOptions> indexOptions;
     // We use this to enforce that a given field never
@@ -401,7 +402,7 @@ static final class FieldNumbers {
 
     FieldNumbers(String softDeletesFieldName, String parentFieldName) {
       this.nameToNumber = new HashMap<>();
-      this.numberToName = new HashMap<>();
+      this.numberToName = new IntObjectHashMap<>();
       this.indexOptions = new HashMap<>();
       this.docValuesType = new HashMap<>();
       this.dimensions = new HashMap<>();

diff --git a/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java b/lucene/core/src/java/org/apache/lucene/util/automaton/Operations.java
@@ -48,6 +48,7 @@
 import org.apache.lucene.util.IntsRefBuilder;
 import org.apache.lucene.util.RamUsageEstimator;
 import org.apache.lucene.util.hppc.BitMixer;
+import org.apache.lucene.util.hppc.IntObjectHashMap;
 
 /**
  * Automata operations.
@@ -573,7 +574,7 @@ static final class PointTransitionSet {
     PointTransitions[] points = new PointTransitions[5];
 
     private static final int HASHMAP_CUTOVER = 30;
-    private final HashMap<Integer, PointTransitions> map = new HashMap<>();
+    private final IntObjectHashMap<PointTransitions> map = new IntObjectHashMap<>();
     private boolean useHash = false;
 
     private PointTransitions next(int point) {

diff --git a/lucene/grouping/src/java/org/apache/lucene/search/grouping/TermGroupSelector.java b/lucene/grouping/src/java/org/apache/lucene/search/grouping/TermGroupSelector.java
@@ -19,21 +19,20 @@
 
 import java.io.IOException;
 import java.util.Collection;
-import java.util.HashMap;
-import java.util.Map;
 import org.apache.lucene.index.DocValues;
 import org.apache.lucene.index.LeafReaderContext;
 import org.apache.lucene.index.SortedDocValues;
 import org.apache.lucene.search.Scorable;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefHash;
+import org.apache.lucene.util.hppc.IntIntHashMap;
 
 /** A GroupSelector implementation that groups via SortedDocValues */
 public class TermGroupSelector extends GroupSelector<BytesRef> {
 
   private final String field;
   private final BytesRefHash values = new BytesRefHash();
-  private final Map<Integer, Integer> ordsToGroupIds = new HashMap<>();
+  private final IntIntHashMap ordsToGroupIds = new IntIntHashMap();
 
   private SortedDocValues docValues;
   private int groupId;