Skip to content

Commit

Permalink
Merge remote-tracking branch 'upstream/main' into feature/more-extens…
Browse files Browse the repository at this point in the history
…ible-flat-vector-storage
  • Loading branch information
benwtrent committed Apr 16, 2024
2 parents 57847f5 + 3ba7ebb commit dbddaf4
Show file tree
Hide file tree
Showing 34 changed files with 729 additions and 431 deletions.
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -222,6 +222,8 @@ Other
* GITHUB#12753: Bump minimum required Java version to 21
(Chris Hegarty, Robert Muir, Uwe Schindler)

* GITHUB#13296: Convert the FieldEntry, a static nested class, into a record. (Sanjay Dutt)

======================== Lucene 9.11.0 =======================

API Changes
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ private FieldEntry readField(IndexInput input, FieldInfo info) throws IOExceptio
+ " != "
+ info.getVectorSimilarityFunction());
}
return new FieldEntry(input, info.getVectorSimilarityFunction());
return FieldEntry.create(input, info.getVectorSimilarityFunction());
}

@Override
Expand Down Expand Up @@ -303,37 +303,44 @@ public void close() throws IOException {
IOUtils.close(vectorData, vectorIndex);
}

private static class FieldEntry {

final int dimension;
final VectorSimilarityFunction similarityFunction;

final long vectorDataOffset;
final long vectorDataLength;
final long indexDataOffset;
final long indexDataLength;
final int[] ordToDoc;
final long[] ordOffsets;

FieldEntry(DataInput input, VectorSimilarityFunction similarityFunction) throws IOException {
this.similarityFunction = similarityFunction;
vectorDataOffset = input.readVLong();
vectorDataLength = input.readVLong();
indexDataOffset = input.readVLong();
indexDataLength = input.readVLong();
dimension = input.readInt();
int size = input.readInt();
ordToDoc = new int[size];
private record FieldEntry(
VectorSimilarityFunction similarityFunction,
long vectorDataOffset,
long vectorDataLength,
long indexDataOffset,
long indexDataLength,
int dimension,
int[] ordToDoc,
long[] ordOffsets) {

static FieldEntry create(DataInput input, VectorSimilarityFunction similarityFunction)
throws IOException {
final var vectorDataOffset = input.readVLong();
final var vectorDataLength = input.readVLong();
final var indexDataOffset = input.readVLong();
final var indexDataLength = input.readVLong();
final var dimension = input.readInt();
final var size = input.readInt();
final var ordToDoc = new int[size];
for (int i = 0; i < size; i++) {
int doc = input.readVInt();
ordToDoc[i] = doc;
}
ordOffsets = new long[size()];
final var ordOffsets = new long[size];
long offset = 0;
for (int i = 0; i < ordOffsets.length; i++) {
offset += input.readVLong();
ordOffsets[i] = offset;
}
return new FieldEntry(
similarityFunction,
vectorDataOffset,
vectorDataLength,
indexDataOffset,
indexDataLength,
dimension,
ordToDoc,
ordOffsets);
}

int size() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ private FieldEntry readField(IndexInput input, FieldInfo info) throws IOExceptio
+ " != "
+ info.getVectorSimilarityFunction());
}
return new FieldEntry(input, info.getVectorSimilarityFunction());
return FieldEntry.create(input, info.getVectorSimilarityFunction());
}

@Override
Expand Down Expand Up @@ -286,32 +286,30 @@ public void close() throws IOException {
IOUtils.close(vectorData, vectorIndex);
}

private static class FieldEntry {

final VectorSimilarityFunction similarityFunction;
final long vectorDataOffset;
final long vectorDataLength;
final long vectorIndexOffset;
final long vectorIndexLength;
final int maxConn;
final int numLevels;
final int dimension;
private final int size;
final int[] ordToDoc;
private final IntUnaryOperator ordToDocOperator;
final int[][] nodesByLevel;
// for each level the start offsets in vectorIndex file from where to read neighbours
final long[] graphOffsetsByLevel;

FieldEntry(DataInput input, VectorSimilarityFunction similarityFunction) throws IOException {
this.similarityFunction = similarityFunction;
vectorDataOffset = input.readVLong();
vectorDataLength = input.readVLong();
vectorIndexOffset = input.readVLong();
vectorIndexLength = input.readVLong();
dimension = input.readInt();
size = input.readInt();

private record FieldEntry(
VectorSimilarityFunction similarityFunction,
long vectorDataOffset,
long vectorDataLength,
long vectorIndexOffset,
long vectorIndexLength,
int maxConn,
int numLevels,
int dimension,
int size,
int[] ordToDoc,
IntUnaryOperator ordToDocOperator,
int[][] nodesByLevel,
// for each level the start offsets in vectorIndex file from where to read neighbours
long[] graphOffsetsByLevel) {
static FieldEntry create(DataInput input, VectorSimilarityFunction similarityFunction)
throws IOException {
final var vectorDataOffset = input.readVLong();
final var vectorDataLength = input.readVLong();
final var vectorIndexOffset = input.readVLong();
final var vectorIndexLength = input.readVLong();
final var dimension = input.readInt();
final var size = input.readInt();
final int[] ordToDoc;
int denseSparseMarker = input.readByte();
if (denseSparseMarker == -1) {
ordToDoc = null; // each document has a vector value
Expand All @@ -328,12 +326,13 @@ private static class FieldEntry {
ordToDoc[i] = doc;
}
}
ordToDocOperator = ordToDoc == null ? IntUnaryOperator.identity() : (ord) -> ordToDoc[ord];
final IntUnaryOperator ordToDocOperator =
ordToDoc == null ? IntUnaryOperator.identity() : (ord) -> ordToDoc[ord];

// read nodes by level
maxConn = input.readInt();
numLevels = input.readInt();
nodesByLevel = new int[numLevels][];
final var maxConn = input.readInt();
final var numLevels = input.readInt();
final var nodesByLevel = new int[numLevels][];
for (int level = 0; level < numLevels; level++) {
int numNodesOnLevel = input.readInt();
if (level == 0) {
Expand All @@ -350,7 +349,7 @@ private static class FieldEntry {

// calculate for each level the start offsets in vectorIndex file from where to read
// neighbours
graphOffsetsByLevel = new long[numLevels];
final var graphOffsetsByLevel = new long[numLevels];
final long connectionsAndSizeBytes =
Math.multiplyExact(Math.addExact(1L, maxConn), Integer.BYTES);
for (int level = 0; level < numLevels; level++) {
Expand All @@ -364,10 +363,21 @@ private static class FieldEntry {
Math.multiplyExact(connectionsAndSizeBytes, numNodesOnPrevLevel));
}
}
}

int size() {
return size;
return new FieldEntry(
similarityFunction,
vectorDataOffset,
vectorDataLength,
vectorIndexOffset,
vectorIndexLength,
maxConn,
numLevels,
dimension,
size,
ordToDoc,
ordToDocOperator,
nodesByLevel,
graphOffsetsByLevel);
}

int ordToDoc(int ord) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -201,7 +201,7 @@ private FieldEntry readField(IndexInput input, FieldInfo info) throws IOExceptio
+ " != "
+ info.getVectorSimilarityFunction());
}
return new FieldEntry(input, info.getVectorSimilarityFunction());
return FieldEntry.create(input, info.getVectorSimilarityFunction());
}

@Override
Expand Down Expand Up @@ -257,52 +257,54 @@ public void close() throws IOException {
IOUtils.close(vectorData, vectorIndex);
}

static class FieldEntry {

final VectorSimilarityFunction similarityFunction;
final long vectorDataOffset;
final long vectorDataLength;
final long vectorIndexOffset;
final long vectorIndexLength;
final int M;
final int numLevels;
final int dimension;
final int size;
final int[][] nodesByLevel;
// for each level the start offsets in vectorIndex file from where to read neighbours
final long[] graphOffsetsByLevel;

// the following four variables used to read docIds encoded by IndexDISI
// special values of docsWithFieldOffset are -1 and -2
// -1 : dense
// -2 : empty
// other: sparse
final long docsWithFieldOffset;
final long docsWithFieldLength;
final short jumpTableEntryCount;
final byte denseRankPower;

// the following four variables used to read ordToDoc encoded by DirectMonotonicWriter
// note that only spare case needs to store ordToDoc
final long addressesOffset;
final int blockShift;
final DirectMonotonicReader.Meta meta;
final long addressesLength;

FieldEntry(IndexInput input, VectorSimilarityFunction similarityFunction) throws IOException {
this.similarityFunction = similarityFunction;
vectorDataOffset = input.readVLong();
vectorDataLength = input.readVLong();
vectorIndexOffset = input.readVLong();
vectorIndexLength = input.readVLong();
dimension = input.readInt();
size = input.readInt();

docsWithFieldOffset = input.readLong();
docsWithFieldLength = input.readLong();
jumpTableEntryCount = input.readShort();
denseRankPower = input.readByte();

static record FieldEntry(
VectorSimilarityFunction similarityFunction,
long vectorDataOffset,
long vectorDataLength,
long vectorIndexOffset,
long vectorIndexLength,
int M,
int numLevels,
int dimension,
int size,
int[][] nodesByLevel,
// for each level the start offsets in vectorIndex file from where to read neighbours
long[] graphOffsetsByLevel,

// the following four variables used to read docIds encoded by IndexDISI
// special values of docsWithFieldOffset are -1 and -2
// -1 : dense
// -2 : empty
// other: sparse
long docsWithFieldOffset,
long docsWithFieldLength,
short jumpTableEntryCount,
byte denseRankPower,

// the following four variables used to read ordToDoc encoded by DirectMonotonicWriter
// note that only spare case needs to store ordToDoc
long addressesOffset,
int blockShift,
DirectMonotonicReader.Meta meta,
long addressesLength) {
static FieldEntry create(IndexInput input, VectorSimilarityFunction similarityFunction)
throws IOException {
final var vectorDataOffset = input.readVLong();
final var vectorDataLength = input.readVLong();
final var vectorIndexOffset = input.readVLong();
final var vectorIndexLength = input.readVLong();
final var dimension = input.readInt();
final var size = input.readInt();

final var docsWithFieldOffset = input.readLong();
final var docsWithFieldLength = input.readLong();
final var jumpTableEntryCount = input.readShort();
final var denseRankPower = input.readByte();

final long addressesOffset;
final int blockShift;
final DirectMonotonicReader.Meta meta;
final long addressesLength;
// dense or empty
if (docsWithFieldOffset == -1 || docsWithFieldOffset == -2) {
addressesOffset = 0;
Expand All @@ -318,9 +320,9 @@ static class FieldEntry {
}

// read nodes by level
M = input.readInt();
numLevels = input.readInt();
nodesByLevel = new int[numLevels][];
final var M = input.readInt();
final var numLevels = input.readInt();
final var nodesByLevel = new int[numLevels][];
for (int level = 0; level < numLevels; level++) {
int numNodesOnLevel = input.readInt();
if (level == 0) {
Expand All @@ -337,7 +339,7 @@ static class FieldEntry {

// calculate for each level the start offsets in vectorIndex file from where to read
// neighbours
graphOffsetsByLevel = new long[numLevels];
final var graphOffsetsByLevel = new long[numLevels];
final long connectionsAndSizeLevel0Bytes =
Math.multiplyExact(Math.addExact(1, Math.multiplyExact(M, 2L)), Integer.BYTES);
final long connectionsAndSizeBytes = Math.multiplyExact(Math.addExact(1L, M), Integer.BYTES);
Expand All @@ -354,10 +356,26 @@ static class FieldEntry {
Math.multiplyExact(connectionsAndSizeBytes, numNodesOnPrevLevel));
}
}
}

int size() {
return size;
return new FieldEntry(
similarityFunction,
vectorDataOffset,
vectorDataLength,
vectorIndexOffset,
vectorIndexLength,
M,
numLevels,
dimension,
size,
nodesByLevel,
graphOffsetsByLevel,
docsWithFieldOffset,
docsWithFieldLength,
jumpTableEntryCount,
denseRankPower,
addressesOffset,
blockShift,
meta,
addressesLength);
}
}

Expand Down
Loading

0 comments on commit dbddaf4

Please sign in to comment.