Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Replace Map<Integer, Object> by primitive IntObjectHashMap. #13368

Merged
merged 3 commits into from
May 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -339,6 +339,8 @@ Optimizations

* GITHUB#13327: Reduce memory usage of field maps in FieldInfos and BlockTree TermsReader. (Bruno Roustant, David Smiley)

* GITHUB#13368: Replace Map<Integer, Object> by primitive IntObjectHashMap. (Bruno Roustant)

Bug Fixes
---------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,11 @@
*/
package org.apache.lucene.analysis.hunspell;

import java.util.HashMap;
import java.util.Map;
import java.util.function.Consumer;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.CharsRef;
import org.apache.lucene.util.IntsRef;
import org.apache.lucene.util.hppc.IntObjectHashMap;

/**
* A cache allowing for CPU-cache-friendlier iteration over {@link WordStorage} entries that can be
Expand All @@ -35,9 +34,7 @@ class SuggestibleEntryCache {

private final Section[] sections;

private SuggestibleEntryCache(Map<Integer, SectionBuilder> builders) {
int maxLength =
builders.isEmpty() ? 0 : builders.keySet().stream().max(Integer::compare).orElseThrow();
private SuggestibleEntryCache(IntObjectHashMap<SectionBuilder> builders, int maxLength) {
sections = new Section[maxLength + 1];
for (int i = 0; i < sections.length; i++) {
SectionBuilder builder = builders.get(i);
Expand All @@ -48,22 +45,33 @@ private SuggestibleEntryCache(Map<Integer, SectionBuilder> builders) {
static SuggestibleEntryCache buildCache(WordStorage storage) {
var consumer =
new Consumer<FlyweightEntry>() {
final Map<Integer, SectionBuilder> builders = new HashMap<>();
final IntObjectHashMap<SectionBuilder> builders = new IntObjectHashMap<>();
int maxLength;

@Override
public void accept(FlyweightEntry entry) {
CharsRef root = entry.root();
if (root.length > Short.MAX_VALUE) {
throw new UnsupportedOperationException(
"Too long dictionary entry, please report this to dev@lucene.apache.org");
} else if (root.length > maxLength) {
maxLength = root.length;
}

builders.computeIfAbsent(root.length, __ -> new SectionBuilder()).add(entry);
SectionBuilder builder;
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the only spot where the replacement brings some complexity. It does not seem too complex to me, so I kept it.

int index = builders.indexOf(root.length);
if (index < 0) {
builder = new SectionBuilder();
builders.indexInsert(index, root.length, builder);
} else {
builder = builders.indexGet(index);
}
builder.add(entry);
}
};
storage.processSuggestibleWords(1, Integer.MAX_VALUE, consumer);

return new SuggestibleEntryCache(consumer.builders);
return new SuggestibleEntryCache(consumer.builders, consumer.maxLength);
}

private static class SectionBuilder {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,10 @@
import java.util.Arrays;
import java.util.Collections;
import java.util.EnumMap;
import java.util.HashMap;
import java.util.List;
import org.apache.lucene.util.ArrayUtil;
import org.apache.lucene.util.fst.FST;
import org.apache.lucene.util.hppc.IntIntHashMap;

/** {@link Viterbi} subclass for n-best path calculation. */
public abstract class ViterbiNBest<T extends Token, U extends MorphData>
Expand Down Expand Up @@ -137,14 +137,14 @@ protected final void fixupPendingList() {
}

// offset=>position map
HashMap<Integer, Integer> map = new HashMap<>();
IntIntHashMap map = new IntIntHashMap();
for (Token t : pending) {
map.put(t.getOffset(), 0);
map.put(t.getOffset() + t.getLength(), 0);
}

// Get unique and sorted list of all edge position of tokens.
Integer[] offsets = map.keySet().toArray(new Integer[0]);
int[] offsets = map.keys().toArray();
Arrays.sort(offsets);

// setup all value of map. It specifies N-th position from begin.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,14 +24,14 @@
import java.io.IOException;
import java.io.InputStream;
import java.nio.charset.StandardCharsets;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.TokenizerFactory;
import org.apache.lucene.util.AttributeFactory;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.ResourceLoader;
import org.apache.lucene.util.ResourceLoaderAware;
import org.apache.lucene.util.hppc.IntObjectHashMap;

/**
* Factory for {@link ICUTokenizer}. Words are broken across script boundaries, then segmented
Expand Down Expand Up @@ -74,15 +74,15 @@ public class ICUTokenizerFactory extends TokenizerFactory implements ResourceLoa
public static final String NAME = "icu";

static final String RULEFILES = "rulefiles";
private final Map<Integer, String> tailored;
private final IntObjectHashMap<String> tailored;
private ICUTokenizerConfig config;
private final boolean cjkAsWords;
private final boolean myanmarAsWords;

/** Creates a new ICUTokenizerFactory */
public ICUTokenizerFactory(Map<String, String> args) {
super(args);
tailored = new HashMap<>();
tailored = new IntObjectHashMap<>();
String rulefilesArg = get(args, RULEFILES);
if (rulefilesArg != null) {
List<String> scriptAndResourcePaths = splitFileNames(rulefilesArg);
Expand Down Expand Up @@ -113,9 +113,9 @@ public void inform(ResourceLoader loader) throws IOException {
} else {
final BreakIterator[] breakers =
new BreakIterator[1 + UCharacter.getIntPropertyMaxValue(UProperty.SCRIPT)];
for (Map.Entry<Integer, String> entry : tailored.entrySet()) {
int code = entry.getKey();
String resourcePath = entry.getValue();
for (IntObjectHashMap.IntObjectCursor<String> entry : tailored) {
int code = entry.key;
String resourcePath = entry.value;
breakers[code] = parseRules(resourcePath, loader);
}
config =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,9 @@
package org.apache.lucene.analysis.cn.smart.hhmm;

import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.cn.smart.Utility;
import org.apache.lucene.util.hppc.IntObjectHashMap;

/**
* Graph representing possible token pairs (bigrams) at each start offset in the sentence.
Expand All @@ -32,7 +30,7 @@
*/
class BiSegGraph {

private Map<Integer, ArrayList<SegTokenPair>> tokenPairListTable = new HashMap<>();
private IntObjectHashMap<ArrayList<SegTokenPair>> tokenPairListTable = new IntObjectHashMap<>();

private List<SegToken> segTokenList;

Expand Down Expand Up @@ -122,7 +120,7 @@ private void generateBiSegGraph(SegGraph segGraph) {
* @return true if a token pair exists
*/
public boolean isToExist(int to) {
return tokenPairListTable.get(Integer.valueOf(to)) != null;
return tokenPairListTable.get(to) != null;
}

/**
Expand Down Expand Up @@ -220,9 +218,9 @@ public List<SegToken> getShortPath() {
@Override
public String toString() {
StringBuilder sb = new StringBuilder();
Collection<ArrayList<SegTokenPair>> values = tokenPairListTable.values();
for (ArrayList<SegTokenPair> segList : values) {
for (SegTokenPair pair : segList) {
for (IntObjectHashMap.ObjectCursor<ArrayList<SegTokenPair>> segList :
tokenPairListTable.values()) {
for (SegTokenPair pair : segList.value) {
sb.append(pair).append("\n");
}
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,8 @@
package org.apache.lucene.analysis.cn.smart.hhmm;

import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.util.hppc.IntObjectHashMap;

/**
* Graph representing possible tokens at each start offset in the sentence.
Expand All @@ -31,7 +30,7 @@
class SegGraph {

/** Map of start offsets to ArrayList of tokens at that position */
private Map<Integer, ArrayList<SegToken>> tokenListTable = new HashMap<>();
private IntObjectHashMap<ArrayList<SegToken>> tokenListTable = new IntObjectHashMap<>();

private int maxStart = -1;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,6 @@
package org.apache.lucene.benchmark.byTask.feeds;

import java.util.AbstractMap;
import java.util.HashMap;
import java.util.Map;
import java.util.Random;
import java.util.Set;
Expand All @@ -31,6 +30,7 @@
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTree;
import org.apache.lucene.spatial.prefix.tree.SpatialPrefixTreeFactory;
import org.apache.lucene.spatial.serialized.SerializedDVStrategy;
import org.apache.lucene.util.hppc.IntObjectHashMap;
import org.locationtech.spatial4j.context.SpatialContext;
import org.locationtech.spatial4j.context.SpatialContextFactory;
import org.locationtech.spatial4j.shape.Point;
Expand All @@ -50,7 +50,7 @@ public class SpatialDocMaker extends DocMaker {
public static final String SPATIAL_FIELD = "spatial";

// cache spatialStrategy by round number
private static Map<Integer, SpatialStrategy> spatialStrategyCache = new HashMap<>();
private static IntObjectHashMap<SpatialStrategy> spatialStrategyCache = new IntObjectHashMap<>();

private SpatialStrategy strategy;
private ShapeConverter shapeConverter;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,16 +19,15 @@

import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
import org.apache.lucene.analysis.tokenattributes.PositionLengthAttribute;
import org.apache.lucene.util.automaton.Automaton;
import org.apache.lucene.util.automaton.Transition;
import org.apache.lucene.util.hppc.IntIntHashMap;

/** Converts an Automaton into a TokenStream. */
public class AutomatonToTokenStream {
Expand Down Expand Up @@ -61,7 +60,7 @@ public static TokenStream toTokenStream(Automaton automaton) {
}

LinkedList<RemapNode> noIncomingEdges = new LinkedList<>();
Map<Integer, Integer> idToPos = new HashMap<>();
IntIntHashMap idToPos = new IntIntHashMap();
noIncomingEdges.addLast(new RemapNode(0, 0));
while (noIncomingEdges.isEmpty() == false) {
RemapNode currState = noIncomingEdges.removeFirst();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,6 @@
import static org.apache.lucene.codecs.lucene90.Lucene90NormsFormat.VERSION_START;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.NormsProducer;
import org.apache.lucene.index.CorruptIndexException;
Expand All @@ -35,17 +33,18 @@
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.RandomAccessInput;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.hppc.IntObjectHashMap;

/** Reader for {@link Lucene90NormsFormat} */
final class Lucene90NormsProducer extends NormsProducer implements Cloneable {
// metadata maps (just file pointers and minimal stuff)
private final Map<Integer, NormsEntry> norms = new HashMap<>();
private final IntObjectHashMap<NormsEntry> norms = new IntObjectHashMap<>();
private final int maxDoc;
private IndexInput data;
private boolean merging;
private Map<Integer, IndexInput> disiInputs;
private Map<Integer, RandomAccessInput> disiJumpTables;
private Map<Integer, RandomAccessInput> dataInputs;
private IntObjectHashMap<IndexInput> disiInputs;
private IntObjectHashMap<RandomAccessInput> disiJumpTables;
private IntObjectHashMap<RandomAccessInput> dataInputs;

Lucene90NormsProducer(
SegmentReadState state,
Expand Down Expand Up @@ -121,9 +120,9 @@ public NormsProducer getMergeInstance() {
throw new RuntimeException(e);
}
clone.data = data.clone();
clone.disiInputs = new HashMap<>();
clone.disiJumpTables = new HashMap<>();
clone.dataInputs = new HashMap<>();
clone.disiInputs = new IntObjectHashMap<>();
clone.disiJumpTables = new IntObjectHashMap<>();
clone.dataInputs = new IntObjectHashMap<>();
clone.merging = true;
return clone;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@
package org.apache.lucene.codecs.lucene90;

import java.io.IOException;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.codecs.CodecUtil;
import org.apache.lucene.codecs.PointsReader;
import org.apache.lucene.index.CorruptIndexException;
Expand All @@ -31,12 +29,13 @@
import org.apache.lucene.store.ReadAdvice;
import org.apache.lucene.util.IOUtils;
import org.apache.lucene.util.bkd.BKDReader;
import org.apache.lucene.util.hppc.IntObjectHashMap;

/** Reads point values previously written with {@link Lucene90PointsWriter} */
public class Lucene90PointsReader extends PointsReader {
final IndexInput indexIn, dataIn;
final SegmentReadState readState;
final Map<Integer, PointValues> readers = new HashMap<>();
final IntObjectHashMap<PointValues> readers = new IntObjectHashMap<>();

/** Sole constructor */
public Lucene90PointsReader(SegmentReadState readState) throws IOException {
Expand Down
5 changes: 3 additions & 2 deletions lucene/core/src/java/org/apache/lucene/index/FieldInfos.java
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
import java.util.stream.Collectors;
import java.util.stream.StreamSupport;
import org.apache.lucene.util.CollectionUtil;
import org.apache.lucene.util.hppc.IntObjectHashMap;

/**
* Collection of {@link FieldInfo}s (accessible by number or by name).
Expand Down Expand Up @@ -374,7 +375,7 @@ static final class FieldVectorProperties {

static final class FieldNumbers {

private final Map<Integer, String> numberToName;
private final IntObjectHashMap<String> numberToName;
private final Map<String, Integer> nameToNumber;
private final Map<String, IndexOptions> indexOptions;
// We use this to enforce that a given field never
Expand All @@ -401,7 +402,7 @@ static final class FieldNumbers {

FieldNumbers(String softDeletesFieldName, String parentFieldName) {
this.nameToNumber = new HashMap<>();
this.numberToName = new HashMap<>();
this.numberToName = new IntObjectHashMap<>();
this.indexOptions = new HashMap<>();
this.docValuesType = new HashMap<>();
this.dimensions = new HashMap<>();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@
import org.apache.lucene.util.IntsRefBuilder;
import org.apache.lucene.util.RamUsageEstimator;
import org.apache.lucene.util.hppc.BitMixer;
import org.apache.lucene.util.hppc.IntObjectHashMap;

/**
* Automata operations.
Expand Down Expand Up @@ -573,7 +574,7 @@ static final class PointTransitionSet {
PointTransitions[] points = new PointTransitions[5];

private static final int HASHMAP_CUTOVER = 30;
private final HashMap<Integer, PointTransitions> map = new HashMap<>();
private final IntObjectHashMap<PointTransitions> map = new IntObjectHashMap<>();
private boolean useHash = false;

private PointTransitions next(int point) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,21 +19,20 @@

import java.io.IOException;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;
import org.apache.lucene.index.DocValues;
import org.apache.lucene.index.LeafReaderContext;
import org.apache.lucene.index.SortedDocValues;
import org.apache.lucene.search.Scorable;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.BytesRefHash;
import org.apache.lucene.util.hppc.IntIntHashMap;

/** A GroupSelector implementation that groups via SortedDocValues */
public class TermGroupSelector extends GroupSelector<BytesRef> {

private final String field;
private final BytesRefHash values = new BytesRefHash();
private final Map<Integer, Integer> ordsToGroupIds = new HashMap<>();
private final IntIntHashMap ordsToGroupIds = new IntIntHashMap();

private SortedDocValues docValues;
private int groupId;
Expand Down
Loading