From 19ae89be1b6921f0ed5c962ffa56b021077194f3 Mon Sep 17 00:00:00 2001 From: Benjamin Trent Date: Wed, 2 Oct 2024 09:27:41 -0400 Subject: [PATCH] Fix bug where off-heap scorer would kick on even for float vectors (#13850) introduced in the major refactor #13779 Off-heap scoring is only present for byte[] vectors, and it isn't enough to verify that the vector provider also satisfies the HasIndexSlice interface. The vectors need to be byte vectors otherwise, the slice iterations and scoring are completely nonsensical leading to HNSW graph building to run until the heat-death of the universe. --- .../Lucene99MemorySegmentByteVectorScorer.java | 2 ++ .../Lucene99MemorySegmentByteVectorScorerSupplier.java | 2 ++ .../Lucene99MemorySegmentFlatVectorsScorer.java | 7 +++++-- 3 files changed, 9 insertions(+), 2 deletions(-) diff --git a/lucene/core/src/java21/org/apache/lucene/internal/vectorization/Lucene99MemorySegmentByteVectorScorer.java b/lucene/core/src/java21/org/apache/lucene/internal/vectorization/Lucene99MemorySegmentByteVectorScorer.java index dae2cc3502cd..b65f1e570921 100644 --- a/lucene/core/src/java21/org/apache/lucene/internal/vectorization/Lucene99MemorySegmentByteVectorScorer.java +++ b/lucene/core/src/java21/org/apache/lucene/internal/vectorization/Lucene99MemorySegmentByteVectorScorer.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.lang.foreign.MemorySegment; import java.util.Optional; +import org.apache.lucene.index.ByteVectorValues; import org.apache.lucene.index.KnnVectorValues; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.store.FilterIndexInput; @@ -40,6 +41,7 @@ abstract sealed class Lucene99MemorySegmentByteVectorScorer */ public static Optional create( VectorSimilarityFunction type, IndexInput input, KnnVectorValues values, byte[] queryVector) { + assert values instanceof ByteVectorValues; input = FilterIndexInput.unwrapOnlyTest(input); if (!(input instanceof MemorySegmentAccessInput msInput)) { return Optional.empty(); diff --git a/lucene/core/src/java21/org/apache/lucene/internal/vectorization/Lucene99MemorySegmentByteVectorScorerSupplier.java b/lucene/core/src/java21/org/apache/lucene/internal/vectorization/Lucene99MemorySegmentByteVectorScorerSupplier.java index 9dd2b4620ace..02c71561122d 100644 --- a/lucene/core/src/java21/org/apache/lucene/internal/vectorization/Lucene99MemorySegmentByteVectorScorerSupplier.java +++ b/lucene/core/src/java21/org/apache/lucene/internal/vectorization/Lucene99MemorySegmentByteVectorScorerSupplier.java @@ -19,6 +19,7 @@ import java.io.IOException; import java.lang.foreign.MemorySegment; import java.util.Optional; +import org.apache.lucene.index.ByteVectorValues; import org.apache.lucene.index.KnnVectorValues; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.store.FilterIndexInput; @@ -42,6 +43,7 @@ public abstract sealed class Lucene99MemorySegmentByteVectorScorerSupplier */ static Optional create( VectorSimilarityFunction type, IndexInput input, KnnVectorValues values) { + assert values instanceof ByteVectorValues; input = FilterIndexInput.unwrapOnlyTest(input); if (!(input instanceof MemorySegmentAccessInput msInput)) { return Optional.empty(); diff --git a/lucene/core/src/java21/org/apache/lucene/internal/vectorization/Lucene99MemorySegmentFlatVectorsScorer.java b/lucene/core/src/java21/org/apache/lucene/internal/vectorization/Lucene99MemorySegmentFlatVectorsScorer.java index 63e79bccbdea..bd8cbb2c388a 100644 --- a/lucene/core/src/java21/org/apache/lucene/internal/vectorization/Lucene99MemorySegmentFlatVectorsScorer.java +++ b/lucene/core/src/java21/org/apache/lucene/internal/vectorization/Lucene99MemorySegmentFlatVectorsScorer.java @@ -20,6 +20,7 @@ import org.apache.lucene.codecs.hnsw.DefaultFlatVectorScorer; import org.apache.lucene.codecs.hnsw.FlatVectorsScorer; import org.apache.lucene.codecs.lucene95.HasIndexSlice; +import org.apache.lucene.index.ByteVectorValues; import org.apache.lucene.index.KnnVectorValues; import org.apache.lucene.index.VectorSimilarityFunction; import org.apache.lucene.util.hnsw.RandomVectorScorer; @@ -43,7 +44,8 @@ public RandomVectorScorerSupplier getRandomVectorScorerSupplier( // a quantized values here is a wrapping or delegation issue assert !(vectorValues instanceof QuantizedByteVectorValues); // currently only supports binary vectors - if (vectorValues instanceof HasIndexSlice byteVectorValues + if (vectorValues instanceof ByteVectorValues bvv + && bvv instanceof HasIndexSlice byteVectorValues && byteVectorValues.getSlice() != null) { var scorer = Lucene99MemorySegmentByteVectorScorerSupplier.create( @@ -70,7 +72,8 @@ public RandomVectorScorer getRandomVectorScorer( checkDimensions(queryVector.length, vectorValues.dimension()); // a quantized values here is a wrapping or delegation issue assert !(vectorValues instanceof QuantizedByteVectorValues); - if (vectorValues instanceof HasIndexSlice byteVectorValues + if (vectorValues instanceof ByteVectorValues bvv + && bvv instanceof HasIndexSlice byteVectorValues && byteVectorValues.getSlice() != null) { var scorer = Lucene99MemorySegmentByteVectorScorer.create(