From 974fb09e20ff43f08733a9923dd5cee277081b02 Mon Sep 17 00:00:00 2001 From: Anh Dung Bui Date: Sun, 21 Jul 2024 15:27:48 +0900 Subject: [PATCH 1/2] Optimize binary search call --- .../apache/lucene/search/AbstractVectorSimilarityQuery.java | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lucene/core/src/java/org/apache/lucene/search/AbstractVectorSimilarityQuery.java b/lucene/core/src/java/org/apache/lucene/search/AbstractVectorSimilarityQuery.java index 77a5ff6f24f0..6ac8f940dea0 100644 --- a/lucene/core/src/java/org/apache/lucene/search/AbstractVectorSimilarityQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/AbstractVectorSimilarityQuery.java @@ -235,9 +235,12 @@ public int nextDoc() { @Override public int advance(int target) { + assert index >= -1 : "index must >= -1 but got " + index; index = Arrays.binarySearch( scoreDocs, + Math.min(index + 1, scoreDocs.length), + scoreDocs.length, new ScoreDoc(target, 0), Comparator.comparingInt(scoreDoc -> scoreDoc.doc)); if (index < 0) { From c94d577fe53dec0a72b436b2da585930db445ee6 Mon Sep 17 00:00:00 2001 From: Anh Dung Bui Date: Sun, 15 Dec 2024 10:45:22 +0900 Subject: [PATCH 2/2] Move exponential search to ArrayUtil --- .../search/AbstractVectorSimilarityQuery.java | 3 +- .../org/apache/lucene/util/ArrayUtil.java | 40 ++++++++++++++ .../apache/lucene/util/IntArrayDocIdSet.java | 7 +-- .../org/apache/lucene/util/TestArrayUtil.java | 54 +++++++++++++++++++ 4 files changed, 97 insertions(+), 7 deletions(-) diff --git a/lucene/core/src/java/org/apache/lucene/search/AbstractVectorSimilarityQuery.java b/lucene/core/src/java/org/apache/lucene/search/AbstractVectorSimilarityQuery.java index 6ac8f940dea0..d8de7dde3a06 100644 --- a/lucene/core/src/java/org/apache/lucene/search/AbstractVectorSimilarityQuery.java +++ b/lucene/core/src/java/org/apache/lucene/search/AbstractVectorSimilarityQuery.java @@ -23,6 +23,7 @@ import java.util.Objects; import org.apache.lucene.index.LeafReader; import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.util.ArrayUtil; import org.apache.lucene.util.BitSet; import org.apache.lucene.util.BitSetIterator; import org.apache.lucene.util.Bits; @@ -237,7 +238,7 @@ public int nextDoc() { public int advance(int target) { assert index >= -1 : "index must >= -1 but got " + index; index = - Arrays.binarySearch( + ArrayUtil.exponentialSearch( scoreDocs, Math.min(index + 1, scoreDocs.length), scoreDocs.length, diff --git a/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java b/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java index 722df2ff6879..bba2dfd46ad5 100644 --- a/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java +++ b/lucene/core/src/java/org/apache/lucene/util/ArrayUtil.java @@ -809,4 +809,44 @@ public static int compareUnsigned4(byte[] a, int aOffset, byte[] b, int bOffset) return Integer.compareUnsigned( (int) BitUtil.VH_BE_INT.get(a, aOffset), (int) BitUtil.VH_BE_INT.get(b, bOffset)); } + + /** + * Run an exponential search for the target in an array + * + * @param arr the array + * @param fromIndex the start index of the search (inclusive) + * @param toIndex the end index of the search (exclusive) + * @param target the target to search for + * @return index of the search key, if it is contained in the array; otherwise, (-(insertion + * point) - 1) + */ + public static int exponentialSearch(int[] arr, int fromIndex, int toIndex, int target) { + int bound = 1; + while (fromIndex + bound < toIndex && arr[fromIndex + bound] < target) { + bound *= 2; + } + return Arrays.binarySearch( + arr, fromIndex + bound / 2, Math.min(fromIndex + bound + 1, toIndex), target); + } + + /** + * Run an exponential search for the target in an array + * + * @param arr the array + * @param fromIndex the start index of the search (inclusive) + * @param toIndex the end index of the search (exclusive) + * @param target the target to search for + * @param comp the comparator + * @return index of the search key, if it is contained in the array; otherwise, (-(insertion + * point) - 1) + */ + public static int exponentialSearch( + T[] arr, int fromIndex, int toIndex, T target, Comparator comp) { + int bound = 1; + while (fromIndex + bound < toIndex && comp.compare(arr[fromIndex + bound], target) < 0) { + bound *= 2; + } + return Arrays.binarySearch( + arr, fromIndex + bound / 2, Math.min(fromIndex + bound + 1, toIndex), target, comp); + } } diff --git a/lucene/core/src/java/org/apache/lucene/util/IntArrayDocIdSet.java b/lucene/core/src/java/org/apache/lucene/util/IntArrayDocIdSet.java index eb4b93f499ef..7cd25312a43f 100644 --- a/lucene/core/src/java/org/apache/lucene/util/IntArrayDocIdSet.java +++ b/lucene/core/src/java/org/apache/lucene/util/IntArrayDocIdSet.java @@ -96,12 +96,7 @@ public int nextDoc() throws IOException { @Override public int advance(int target) throws IOException { - int bound = 1; - // given that we use this for small arrays only, this is very unlikely to overflow - while (i + bound < length && docs[i + bound] < target) { - bound *= 2; - } - i = Arrays.binarySearch(docs, i + bound / 2, Math.min(i + bound + 1, length), target); + i = ArrayUtil.exponentialSearch(docs, i, length, target); if (i < 0) { i = -1 - i; } diff --git a/lucene/core/src/test/org/apache/lucene/util/TestArrayUtil.java b/lucene/core/src/test/org/apache/lucene/util/TestArrayUtil.java index 972fd0c3c848..ef8b75f2bab4 100644 --- a/lucene/core/src/test/org/apache/lucene/util/TestArrayUtil.java +++ b/lucene/core/src/test/org/apache/lucene/util/TestArrayUtil.java @@ -514,4 +514,58 @@ public void testCompareUnsigned8() { assertEquals(0, ArrayUtil.compareUnsigned8(a, aOffset, b, bOffset)); } + + public void testExponentialSearchForIntArray() { + final Random rnd = random(); + final int[] arr = new int[rnd.nextInt(2000) + 1]; + int last = 0; + for (int i = 0; i < arr.length; i++) { + arr[i] = last; + last += rnd.nextInt(1, 10); + } + + // case 1: random number, may not be in the array + int target = random().nextInt(arr[arr.length - 1]); + int idx = ArrayUtil.exponentialSearch(arr, 0, arr.length, target); + assertEquals(Arrays.binarySearch(arr, 0, arr.length, target), idx); + + // case 2: search for a number in the array + assertExponentialSearch(arr, random().nextInt(arr.length)); + assertExponentialSearch(arr, 0); + assertExponentialSearch(arr, arr.length - 1); + } + + private static void assertExponentialSearch(int[] arr, int expectedIndex) { + int idx = ArrayUtil.exponentialSearch(arr, 0, arr.length, arr[expectedIndex]); + assertEquals(expectedIndex, idx); + } + + public void testExponentialSearchForObjectArray() { + final Random rnd = random(); + final Integer[] arr = new Integer[rnd.nextInt(2000) + 1]; + int last = 0; + for (int i = 0; i < arr.length; i++) { + arr[i] = last; + last += rnd.nextInt(1, 10); + } + + // case 1: random number, may not be in the array + int target = random().nextInt(arr[arr.length - 1]); + int idx = + ArrayUtil.exponentialSearch( + arr, 0, arr.length, target, Comparator.comparingInt(Integer::intValue)); + assertEquals(Arrays.binarySearch(arr, 0, arr.length, target), idx); + + // case 2: search for a number in the array + assertExponentialSearch(arr, random().nextInt(arr.length)); + assertExponentialSearch(arr, 0); + assertExponentialSearch(arr, arr.length - 1); + } + + private static void assertExponentialSearch(Integer[] arr, int expectedIndex) { + int idx = + ArrayUtil.exponentialSearch( + arr, 0, arr.length, arr[expectedIndex], Comparator.comparingInt(Integer::intValue)); + assertEquals(expectedIndex, idx); + } }