Skip to content

Commit

Permalink
Fix bug where off-heap scorer would kick on even for float vectors (#…
Browse files Browse the repository at this point in the history
…13850)

introduced in the major refactor #13779

Off-heap scoring is only present for byte[] vectors, and it isn't enough to verify that the vector provider also satisfies the HasIndexSlice interface. The vectors need to be byte vectors otherwise, the slice iterations and scoring are completely nonsensical leading to HNSW graph building to run until the heat-death of the universe.
  • Loading branch information
benwtrent committed Oct 2, 2024
1 parent 92444bf commit 19ae89b
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import java.io.IOException;
import java.lang.foreign.MemorySegment;
import java.util.Optional;
import org.apache.lucene.index.ByteVectorValues;
import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.store.FilterIndexInput;
Expand All @@ -40,6 +41,7 @@ abstract sealed class Lucene99MemorySegmentByteVectorScorer
*/
public static Optional<Lucene99MemorySegmentByteVectorScorer> create(
VectorSimilarityFunction type, IndexInput input, KnnVectorValues values, byte[] queryVector) {
assert values instanceof ByteVectorValues;
input = FilterIndexInput.unwrapOnlyTest(input);
if (!(input instanceof MemorySegmentAccessInput msInput)) {
return Optional.empty();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import java.io.IOException;
import java.lang.foreign.MemorySegment;
import java.util.Optional;
import org.apache.lucene.index.ByteVectorValues;
import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.store.FilterIndexInput;
Expand All @@ -42,6 +43,7 @@ public abstract sealed class Lucene99MemorySegmentByteVectorScorerSupplier
*/
static Optional<RandomVectorScorerSupplier> create(
VectorSimilarityFunction type, IndexInput input, KnnVectorValues values) {
assert values instanceof ByteVectorValues;
input = FilterIndexInput.unwrapOnlyTest(input);
if (!(input instanceof MemorySegmentAccessInput msInput)) {
return Optional.empty();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer;
import org.apache.lucene.codecs.hnsw.FlatVectorsScorer;
import org.apache.lucene.codecs.lucene95.HasIndexSlice;
import org.apache.lucene.index.ByteVectorValues;
import org.apache.lucene.index.KnnVectorValues;
import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.util.hnsw.RandomVectorScorer;
Expand All @@ -43,7 +44,8 @@ public RandomVectorScorerSupplier getRandomVectorScorerSupplier(
// a quantized values here is a wrapping or delegation issue
assert !(vectorValues instanceof QuantizedByteVectorValues);
// currently only supports binary vectors
if (vectorValues instanceof HasIndexSlice byteVectorValues
if (vectorValues instanceof ByteVectorValues bvv
&& bvv instanceof HasIndexSlice byteVectorValues
&& byteVectorValues.getSlice() != null) {
var scorer =
Lucene99MemorySegmentByteVectorScorerSupplier.create(
Expand All @@ -70,7 +72,8 @@ public RandomVectorScorer getRandomVectorScorer(
checkDimensions(queryVector.length, vectorValues.dimension());
// a quantized values here is a wrapping or delegation issue
assert !(vectorValues instanceof QuantizedByteVectorValues);
if (vectorValues instanceof HasIndexSlice byteVectorValues
if (vectorValues instanceof ByteVectorValues bvv
&& bvv instanceof HasIndexSlice byteVectorValues
&& byteVectorValues.getSlice() != null) {
var scorer =
Lucene99MemorySegmentByteVectorScorer.create(
Expand Down

0 comments on commit 19ae89b

Please sign in to comment.