From 4faa3ebdb7422d1f90cbc265194e62cf769cb15a Mon Sep 17 00:00:00 2001 From: Andriy Redko Date: Fri, 2 Feb 2024 13:19:08 -0500 Subject: [PATCH] Add advance(int) for numeric values in order to allow point based optimization to kick in (#12089) (#12129) * Add advance(int) for numeric values in order to allow point based optimization to kick in Signed-off-by: Andriy Redko * Address code review comments Signed-off-by: Andriy Redko --------- Signed-off-by: Andriy Redko (cherry picked from commit 4471a8d49b3415a78a0d1429c63fc6cda4531235) --- CHANGELOG.md | 1 + .../opensearch/search/sort/FieldSortIT.java | 181 ++++++++++++++++++ .../fielddata/AbstractNumericDocValues.java | 3 + .../opensearch/index/fielddata/FieldData.java | 34 ++++ .../index/fielddata/NumericDoubleValues.java | 22 +++ .../SingletonSortedNumericDoubleValues.java | 4 + .../SortableLongBitsNumericDocValues.java | 5 + ...SortableLongBitsToNumericDoubleValues.java | 4 + ...leLongBitsToSortedNumericDoubleValues.java | 4 + .../fielddata/SortedNumericDoubleValues.java | 11 ++ .../UnsignedLongToNumericDoubleValues.java | 4 + ...signedLongToSortedNumericDoubleValues.java | 4 + .../plain/SortedNumericIndexFieldData.java | 20 ++ .../functionscore/DecayFunctionBuilder.java | 5 + .../org/opensearch/search/MultiValueMode.java | 10 + .../aggregations/support/MissingValues.java | 4 + .../aggregations/support/ValuesSource.java | 5 + 17 files changed, 321 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index d0b74dc536030..328a9e219034a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -160,6 +160,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), - Prevent setting remote_snapshot store type on index creation ([#11867](https://github.com/opensearch-project/OpenSearch/pull/11867)) - [BUG] Fix remote shards balancer when filtering throttled nodes ([#11724](https://github.com/opensearch-project/OpenSearch/pull/11724)) - [Bug] Check phase name before SearchRequestOperationsListener onPhaseStart ([#12094](https://github.com/opensearch-project/OpenSearch/pull/12094)) +- Add advance(int) for numeric values in order to allow point based optimization to kick in ([#12089](https://github.com/opensearch-project/OpenSearch/pull/12089)) ### Security diff --git a/server/src/internalClusterTest/java/org/opensearch/search/sort/FieldSortIT.java b/server/src/internalClusterTest/java/org/opensearch/search/sort/FieldSortIT.java index f77f5a682da25..6145c3b8d86ab 100644 --- a/server/src/internalClusterTest/java/org/opensearch/search/sort/FieldSortIT.java +++ b/server/src/internalClusterTest/java/org/opensearch/search/sort/FieldSortIT.java @@ -2389,4 +2389,185 @@ public void testLongSortOptimizationCorrectResults() throws InterruptedException } } + public void testSimpleSortsPoints() throws Exception { + final int docs = 100; + + Random random = random(); + assertAcked( + prepareCreate("test").setMapping( + XContentFactory.jsonBuilder() + .startObject() + .startObject("properties") + .startObject("str_value") + .field("type", "keyword") + .endObject() + .startObject("boolean_value") + .field("type", "boolean") + .endObject() + .startObject("byte_value") + .field("type", "byte") + .endObject() + .startObject("short_value") + .field("type", "short") + .endObject() + .startObject("integer_value") + .field("type", "integer") + .endObject() + .startObject("long_value") + .field("type", "long") + .endObject() + .startObject("unsigned_long_value") + .field("type", "unsigned_long") + .endObject() + .startObject("float_value") + .field("type", "float") + .endObject() + .startObject("half_float_value") + .field("type", "half_float") + .endObject() + .startObject("double_value") + .field("type", "double") + .endObject() + .endObject() + .endObject() + ) + ); + ensureGreen(); + BigInteger UNSIGNED_LONG_BASE = Numbers.MAX_UNSIGNED_LONG_VALUE.subtract(BigInteger.valueOf(10000 * docs)); + List builders = new ArrayList<>(); + for (int i = 0; i < docs / 2; i++) { + IndexRequestBuilder builder = client().prepareIndex("test") + .setId(Integer.toString(i)) + .setSource( + jsonBuilder().startObject() + .field("str_value", new String(new char[] { (char) (97 + i), (char) (97 + i) })) + .field("boolean_value", true) + .field("byte_value", i) + .field("short_value", i) + .field("integer_value", i) + .field("long_value", i) + .field("unsigned_long_value", UNSIGNED_LONG_BASE.add(BigInteger.valueOf(10000 * i))) + .field("float_value", 32 * i) + .field("half_float_value", 16 * i) + .field("double_value", 64 * i) + .endObject() + ); + builders.add(builder); + } + + // We keep half of the docs with numeric values and other half without + for (int i = docs / 2; i < docs; i++) { + IndexRequestBuilder builder = client().prepareIndex("test") + .setId(Integer.toString(i)) + .setSource( + jsonBuilder().startObject().field("str_value", new String(new char[] { (char) (97 + i), (char) (97 + i) })).endObject() + ); + builders.add(builder); + } + + int j = 0; + Collections.shuffle(builders, random); + for (IndexRequestBuilder builder : builders) { + builder.get(); + if ((++j % 25) == 0) { + refresh(); + } + + } + refresh(); + indexRandomForConcurrentSearch("test"); + + final int size = 2; + // HALF_FLOAT + SearchResponse searchResponse = client().prepareSearch() + .setQuery(matchAllQuery()) + .setSize(size) + .addSort("half_float_value", SortOrder.ASC) + .get(); + + assertHitCount(searchResponse, docs); + assertThat(searchResponse.getHits().getHits().length, equalTo(size)); + for (int i = 0; i < size; i++) { + assertThat(searchResponse.getHits().getAt(i).getId(), equalTo(Integer.toString(i))); + } + + assertThat(searchResponse.toString(), not(containsString("error"))); + searchResponse = client().prepareSearch().setQuery(matchAllQuery()).setSize(size).addSort("half_float_value", SortOrder.DESC).get(); + + assertHitCount(searchResponse, docs); + assertThat(searchResponse.getHits().getHits().length, equalTo(size)); + for (int i = 0; i < size; i++) { + assertThat(searchResponse.getHits().getAt(i).getId(), equalTo(Integer.toString(docs / 2 - 1 - i))); + } + + assertThat(searchResponse.toString(), not(containsString("error"))); + + // FLOAT + searchResponse = client().prepareSearch().setQuery(matchAllQuery()).setSize(size).addSort("float_value", SortOrder.ASC).get(); + + assertHitCount(searchResponse, docs); + assertThat(searchResponse.getHits().getHits().length, equalTo(size)); + for (int i = 0; i < size; i++) { + assertThat(searchResponse.getHits().getAt(i).getId(), equalTo(Integer.toString(i))); + } + + assertThat(searchResponse.toString(), not(containsString("error"))); + searchResponse = client().prepareSearch().setQuery(matchAllQuery()).setSize(size).addSort("float_value", SortOrder.DESC).get(); + + assertHitCount(searchResponse, docs); + assertThat(searchResponse.getHits().getHits().length, equalTo(size)); + for (int i = 0; i < size; i++) { + assertThat(searchResponse.getHits().getAt(i).getId(), equalTo(Integer.toString(docs / 2 - 1 - i))); + } + + assertThat(searchResponse.toString(), not(containsString("error"))); + + // DOUBLE + searchResponse = client().prepareSearch().setQuery(matchAllQuery()).setSize(size).addSort("double_value", SortOrder.ASC).get(); + + assertHitCount(searchResponse, docs); + assertThat(searchResponse.getHits().getHits().length, equalTo(size)); + for (int i = 0; i < size; i++) { + assertThat(searchResponse.getHits().getAt(i).getId(), equalTo(Integer.toString(i))); + } + + assertThat(searchResponse.toString(), not(containsString("error"))); + searchResponse = client().prepareSearch().setQuery(matchAllQuery()).setSize(size).addSort("double_value", SortOrder.DESC).get(); + + assertHitCount(searchResponse, docs); + assertThat(searchResponse.getHits().getHits().length, equalTo(size)); + for (int i = 0; i < size; i++) { + assertThat(searchResponse.getHits().getAt(i).getId(), equalTo(Integer.toString(docs / 2 - 1 - i))); + } + + assertThat(searchResponse.toString(), not(containsString("error"))); + + // UNSIGNED_LONG + searchResponse = client().prepareSearch() + .setQuery(matchAllQuery()) + .setSize(size) + .addSort("unsigned_long_value", SortOrder.ASC) + .get(); + + assertHitCount(searchResponse, docs); + assertThat(searchResponse.getHits().getHits().length, equalTo(size)); + for (int i = 0; i < size; i++) { + assertThat(searchResponse.getHits().getAt(i).getId(), equalTo(Integer.toString(i))); + } + + assertThat(searchResponse.toString(), not(containsString("error"))); + searchResponse = client().prepareSearch() + .setQuery(matchAllQuery()) + .setSize(size) + .addSort("unsigned_long_value", SortOrder.DESC) + .get(); + + assertHitCount(searchResponse, docs); + assertThat(searchResponse.getHits().getHits().length, equalTo(size)); + for (int i = 0; i < size; i++) { + assertThat(searchResponse.getHits().getAt(i).getId(), equalTo(Integer.toString(docs / 2 - 1 - i))); + } + + assertThat(searchResponse.toString(), not(containsString("error"))); + } } diff --git a/server/src/main/java/org/opensearch/index/fielddata/AbstractNumericDocValues.java b/server/src/main/java/org/opensearch/index/fielddata/AbstractNumericDocValues.java index a2a70e280187a..3a2504ce92158 100644 --- a/server/src/main/java/org/opensearch/index/fielddata/AbstractNumericDocValues.java +++ b/server/src/main/java/org/opensearch/index/fielddata/AbstractNumericDocValues.java @@ -43,6 +43,9 @@ * aggregations, which only use {@link #advanceExact(int)} and * {@link #longValue()}. * + * In case when optimizations based on point values are used, the {@link #advance(int)} + * and, optionally, {@link #cost()} have to be implemented as well. + * * @opensearch.internal */ public abstract class AbstractNumericDocValues extends NumericDocValues { diff --git a/server/src/main/java/org/opensearch/index/fielddata/FieldData.java b/server/src/main/java/org/opensearch/index/fielddata/FieldData.java index e09de53dc05f7..6db6bbccacae5 100644 --- a/server/src/main/java/org/opensearch/index/fielddata/FieldData.java +++ b/server/src/main/java/org/opensearch/index/fielddata/FieldData.java @@ -37,6 +37,7 @@ import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.search.DocIdSetIterator; import org.apache.lucene.util.BytesRef; import org.opensearch.common.Numbers; import org.opensearch.common.geo.GeoPoint; @@ -76,6 +77,10 @@ public double doubleValue() throws IOException { throw new UnsupportedOperationException(); } + @Override + public int advance(int target) throws IOException { + return DocIdSetIterator.NO_MORE_DOCS; + } }; } @@ -561,6 +566,10 @@ public boolean advanceExact(int doc) throws IOException { return values.advanceExact(doc); } + @Override + public int advance(int target) throws IOException { + return values.advance(target); + } } /** @@ -591,6 +600,10 @@ public int docValueCount() { return values.docValueCount(); } + @Override + public int advance(int target) throws IOException { + return values.advance(target); + } } /** @@ -622,6 +635,12 @@ public long longValue() throws IOException { public int docID() { return docID; } + + @Override + public int advance(int target) throws IOException { + docID = values.advance(target); + return docID; + } } /** @@ -683,6 +702,11 @@ public boolean advanceExact(int target) throws IOException { public long longValue() throws IOException { return value; } + + @Override + public int advance(int target) throws IOException { + return values.advance(target); + } }; } @@ -715,6 +739,11 @@ public boolean advanceExact(int target) throws IOException { public long longValue() throws IOException { return value.longValue(); } + + @Override + public int advance(int target) throws IOException { + return values.advance(target); + } }; } @@ -742,6 +771,11 @@ public boolean advanceExact(int target) throws IOException { public double doubleValue() throws IOException { return value; } + + @Override + public int advance(int target) throws IOException { + return values.advance(target); + } }; } } diff --git a/server/src/main/java/org/opensearch/index/fielddata/NumericDoubleValues.java b/server/src/main/java/org/opensearch/index/fielddata/NumericDoubleValues.java index b0f3400acfb3d..f69cfacaf35d4 100644 --- a/server/src/main/java/org/opensearch/index/fielddata/NumericDoubleValues.java +++ b/server/src/main/java/org/opensearch/index/fielddata/NumericDoubleValues.java @@ -71,6 +71,11 @@ public long longValue() throws IOException { public int docID() { return docID; } + + @Override + public int advance(int target) throws IOException { + return NumericDoubleValues.this.advance(target); + } }; } @@ -95,6 +100,23 @@ public long longValue() throws IOException { public int docID() { return docID; } + + @Override + public int advance(int target) throws IOException { + return NumericDoubleValues.this.advance(target); + } }; } + + /** + * Advances to the first beyond the current whose document number is greater than or equal to + * target, and returns the document number itself. Exhausts the iterator and returns {@link + * org.apache.lucene.search.DocIdSetIterator#NO_MORE_DOCS} if target is greater than the highest document number in the set. + * + * This method is being used by {@link org.apache.lucene.search.comparators.NumericComparator.NumericLeafComparator} when point values optimization kicks + * in and is implemented by most numeric types. + */ + public int advance(int target) throws IOException { + throw new UnsupportedOperationException(); + } } diff --git a/server/src/main/java/org/opensearch/index/fielddata/SingletonSortedNumericDoubleValues.java b/server/src/main/java/org/opensearch/index/fielddata/SingletonSortedNumericDoubleValues.java index 4ee494ffb30aa..816445bb319f1 100644 --- a/server/src/main/java/org/opensearch/index/fielddata/SingletonSortedNumericDoubleValues.java +++ b/server/src/main/java/org/opensearch/index/fielddata/SingletonSortedNumericDoubleValues.java @@ -69,4 +69,8 @@ public double nextValue() throws IOException { return in.doubleValue(); } + @Override + public int advance(int target) throws IOException { + return in.advance(target); + } } diff --git a/server/src/main/java/org/opensearch/index/fielddata/SortableLongBitsNumericDocValues.java b/server/src/main/java/org/opensearch/index/fielddata/SortableLongBitsNumericDocValues.java index 39aca38c331ea..e2739e462dea5 100644 --- a/server/src/main/java/org/opensearch/index/fielddata/SortableLongBitsNumericDocValues.java +++ b/server/src/main/java/org/opensearch/index/fielddata/SortableLongBitsNumericDocValues.java @@ -74,4 +74,9 @@ public NumericDoubleValues getDoubleValues() { return values; } + @Override + public int advance(int target) throws IOException { + return values.advance(target); + } + } diff --git a/server/src/main/java/org/opensearch/index/fielddata/SortableLongBitsToNumericDoubleValues.java b/server/src/main/java/org/opensearch/index/fielddata/SortableLongBitsToNumericDoubleValues.java index 150e114d342de..98a44c246f654 100644 --- a/server/src/main/java/org/opensearch/index/fielddata/SortableLongBitsToNumericDoubleValues.java +++ b/server/src/main/java/org/opensearch/index/fielddata/SortableLongBitsToNumericDoubleValues.java @@ -67,4 +67,8 @@ public NumericDocValues getLongValues() { return values; } + @Override + public int advance(int target) throws IOException { + return values.advance(target); + } } diff --git a/server/src/main/java/org/opensearch/index/fielddata/SortableLongBitsToSortedNumericDoubleValues.java b/server/src/main/java/org/opensearch/index/fielddata/SortableLongBitsToSortedNumericDoubleValues.java index 1bae845c9b0d2..279a78ac51adf 100644 --- a/server/src/main/java/org/opensearch/index/fielddata/SortableLongBitsToSortedNumericDoubleValues.java +++ b/server/src/main/java/org/opensearch/index/fielddata/SortableLongBitsToSortedNumericDoubleValues.java @@ -72,4 +72,8 @@ public SortedNumericDocValues getLongValues() { return values; } + @Override + public int advance(int target) throws IOException { + return values.advance(target); + } } diff --git a/server/src/main/java/org/opensearch/index/fielddata/SortedNumericDoubleValues.java b/server/src/main/java/org/opensearch/index/fielddata/SortedNumericDoubleValues.java index dce1aff9cc94f..be9064751b5f0 100644 --- a/server/src/main/java/org/opensearch/index/fielddata/SortedNumericDoubleValues.java +++ b/server/src/main/java/org/opensearch/index/fielddata/SortedNumericDoubleValues.java @@ -70,4 +70,15 @@ protected SortedNumericDoubleValues() {} */ public abstract int docValueCount(); + /** + * Advances to the first beyond the current whose document number is greater than or equal to + * target, and returns the document number itself. Exhausts the iterator and returns {@link + * org.apache.lucene.search.DocIdSetIterator#NO_MORE_DOCS} if target is greater than the highest document number in the set. + * + * This method is being used by {@link org.apache.lucene.search.comparators.NumericComparator.NumericLeafComparator} when point values optimization kicks + * in and is implemented by most numeric types. + */ + public int advance(int target) throws IOException { + throw new UnsupportedOperationException(); + } } diff --git a/server/src/main/java/org/opensearch/index/fielddata/UnsignedLongToNumericDoubleValues.java b/server/src/main/java/org/opensearch/index/fielddata/UnsignedLongToNumericDoubleValues.java index 8d17146760d9e..d9e9dd6a293fd 100644 --- a/server/src/main/java/org/opensearch/index/fielddata/UnsignedLongToNumericDoubleValues.java +++ b/server/src/main/java/org/opensearch/index/fielddata/UnsignedLongToNumericDoubleValues.java @@ -42,4 +42,8 @@ public NumericDocValues getLongValues() { return values; } + @Override + public int advance(int target) throws IOException { + return values.advance(target); + } } diff --git a/server/src/main/java/org/opensearch/index/fielddata/UnsignedLongToSortedNumericDoubleValues.java b/server/src/main/java/org/opensearch/index/fielddata/UnsignedLongToSortedNumericDoubleValues.java index 90b49e19a8954..63c7e6162cc55 100644 --- a/server/src/main/java/org/opensearch/index/fielddata/UnsignedLongToSortedNumericDoubleValues.java +++ b/server/src/main/java/org/opensearch/index/fielddata/UnsignedLongToSortedNumericDoubleValues.java @@ -47,4 +47,8 @@ public SortedNumericDocValues getLongValues() { return values; } + @Override + public int advance(int target) throws IOException { + return values.advance(target); + } } diff --git a/server/src/main/java/org/opensearch/index/fielddata/plain/SortedNumericIndexFieldData.java b/server/src/main/java/org/opensearch/index/fielddata/plain/SortedNumericIndexFieldData.java index b70752df9e826..0019a41e67c02 100644 --- a/server/src/main/java/org/opensearch/index/fielddata/plain/SortedNumericIndexFieldData.java +++ b/server/src/main/java/org/opensearch/index/fielddata/plain/SortedNumericIndexFieldData.java @@ -336,6 +336,11 @@ public double doubleValue() throws IOException { public boolean advanceExact(int doc) throws IOException { return in.advanceExact(doc); } + + @Override + public int advance(int target) throws IOException { + return in.advance(target); + } } /** @@ -364,6 +369,11 @@ public double nextValue() throws IOException { public int docValueCount() { return in.docValueCount(); } + + @Override + public int advance(int target) throws IOException { + return in.advance(target); + } } /** @@ -434,6 +444,11 @@ public double doubleValue() throws IOException { public boolean advanceExact(int doc) throws IOException { return in.advanceExact(doc); } + + @Override + public int advance(int target) throws IOException { + return in.advance(target); + } } /** @@ -462,6 +477,11 @@ public double nextValue() throws IOException { public int docValueCount() { return in.docValueCount(); } + + @Override + public int advance(int target) throws IOException { + return in.advance(target); + } } /** diff --git a/server/src/main/java/org/opensearch/index/query/functionscore/DecayFunctionBuilder.java b/server/src/main/java/org/opensearch/index/query/functionscore/DecayFunctionBuilder.java index cfc44d4434d3b..1c693f9761240 100644 --- a/server/src/main/java/org/opensearch/index/query/functionscore/DecayFunctionBuilder.java +++ b/server/src/main/java/org/opensearch/index/query/functionscore/DecayFunctionBuilder.java @@ -560,6 +560,11 @@ public boolean needsScores() { protected NumericDoubleValues distance(LeafReaderContext context) { final SortedNumericDoubleValues doubleValues = fieldData.load(context).getDoubleValues(); return FieldData.replaceMissing(mode.select(new SortingNumericDoubleValues() { + @Override + public int advance(int target) throws IOException { + return doubleValues.advance(target); + } + @Override public boolean advanceExact(int docId) throws IOException { if (doubleValues.advanceExact(docId)) { diff --git a/server/src/main/java/org/opensearch/search/MultiValueMode.java b/server/src/main/java/org/opensearch/search/MultiValueMode.java index ca088203733c6..a99da674836f2 100644 --- a/server/src/main/java/org/opensearch/search/MultiValueMode.java +++ b/server/src/main/java/org/opensearch/search/MultiValueMode.java @@ -685,6 +685,11 @@ public boolean advanceExact(int target) throws IOException { public double doubleValue() throws IOException { return this.value; } + + @Override + public int advance(int target) throws IOException { + return values.advance(target); + } }; } } @@ -745,6 +750,11 @@ public boolean advanceExact(int parentDoc) throws IOException { public double doubleValue() throws IOException { return lastEmittedValue; } + + @Override + public int advance(int target) throws IOException { + return values.advance(target); + } }; } diff --git a/server/src/main/java/org/opensearch/search/aggregations/support/MissingValues.java b/server/src/main/java/org/opensearch/search/aggregations/support/MissingValues.java index da1d9961ed81b..d21737a8366b2 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/support/MissingValues.java +++ b/server/src/main/java/org/opensearch/search/aggregations/support/MissingValues.java @@ -227,6 +227,10 @@ public String toString() { return "anon SortedNumericDoubleValues of [" + super.toString() + "]"; } + @Override + public int advance(int target) throws IOException { + return values.advance(target); + } }; } diff --git a/server/src/main/java/org/opensearch/search/aggregations/support/ValuesSource.java b/server/src/main/java/org/opensearch/search/aggregations/support/ValuesSource.java index 1a76183ac1a2d..3ce1f0447dfcc 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/support/ValuesSource.java +++ b/server/src/main/java/org/opensearch/search/aggregations/support/ValuesSource.java @@ -576,6 +576,11 @@ public boolean advanceExact(int target) throws IOException { } return false; } + + @Override + public int advance(int target) throws IOException { + return doubleValues.advance(target); + } } }