diff --git a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/330_auto_date_histogram.yml b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/330_auto_date_histogram.yml index 0897e0bdd894b..f585063d7a395 100644 --- a/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/330_auto_date_histogram.yml +++ b/rest-api-spec/src/main/resources/rest-api-spec/test/search.aggregation/330_auto_date_histogram.yml @@ -158,3 +158,111 @@ setup: - match: { profile.shards.0.aggregations.0.debug.unoptimized_segments: 0 } - match: { profile.shards.0.aggregations.0.debug.leaf_visited: 1 } - match: { profile.shards.0.aggregations.0.debug.inner_visited: 0 } + +--- +"Complex aggregation with range, auto_date_histogram, and metric aggregations": + - do: + indices.create: + index: tmin_metrics + body: + mappings: + properties: + "@timestamp": + type: date + metrics: + properties: + size: + type: float + tmin: + type: float + + - do: + bulk: + refresh: true + index: tmin_metrics + body: + - '{"index":{}}' + - '{"@timestamp": "2023-01-01T00:00:00Z", "metrics": {"size": -15, "tmin": -20}}' + - '{"index":{}}' + - '{"@timestamp": "2023-01-02T00:00:00Z", "metrics": {"size": 5, "tmin": 0}}' + - '{"index":{}}' + - '{"@timestamp": "2023-01-03T00:00:00Z", "metrics": {"size": 26, "tmin": 10}}' + - '{"index":{}}' + - '{"@timestamp": "2023-01-04T00:00:00Z", "metrics": {"size": 50, "tmin": 15}}' + - '{"index":{}}' + - '{"@timestamp": "2023-01-05T00:00:00Z", "metrics": {"size": 77, "tmin": 20}}' + - '{"index":{}}' + - '{"@timestamp": "2023-02-15T00:00:00Z", "metrics": {"size": 88, "tmin": 33}}' + - '{"index":{}}' + - '{"@timestamp": "2023-01-06T00:00:00Z", "metrics": {"size": 123, "tmin": 25}}' + + - do: + search: + index: tmin_metrics + body: + size: 0 + aggs: + tmax: + range: + field: metrics.size + ranges: + - to: 10 + - from: 10 + to: 100 + - from: 100 + + aggs: + date: + auto_date_histogram: + field: "@timestamp" + buckets: 20 + aggs: + tmin: + min: + field: metrics.tmin + tavg: + avg: + field: metrics.size + tmax: + max: + field: metrics.size + + - match: { hits.total.value: 7 } + - length: { aggregations.tmax.buckets: 3 } + + - match: { aggregations.tmax.buckets.0.key: "*-10.0" } + - match: { aggregations.tmax.buckets.0.doc_count: 2 } + - length: { aggregations.tmax.buckets.0.date.buckets: 9 } + + - match: { aggregations.tmax.buckets.0.date.buckets.0.doc_count: 1 } + - match: { aggregations.tmax.buckets.0.date.buckets.0.tmin.value: -20.0 } + - match: { aggregations.tmax.buckets.0.date.buckets.0.tavg.value: -15.0 } + - match: { aggregations.tmax.buckets.0.date.buckets.0.tmax.value: -15.0 } + + - match: { aggregations.tmax.buckets.0.date.buckets.8.doc_count: 1 } + - match: { aggregations.tmax.buckets.0.date.buckets.8.tmin.value: 0.0 } + - match: { aggregations.tmax.buckets.0.date.buckets.8.tavg.value: 5.0 } + - match: { aggregations.tmax.buckets.0.date.buckets.8.tmax.value: 5.0 } + + - match: { aggregations.tmax.buckets.1.key: "10.0-100.0" } + - match: { aggregations.tmax.buckets.1.doc_count: 4 } + - length: { aggregations.tmax.buckets.1.date.buckets: 7 } + + - match: { aggregations.tmax.buckets.1.date.buckets.0.doc_count: 3 } + - match: { aggregations.tmax.buckets.1.date.buckets.0.tmin.value: 10.0 } + - match: { aggregations.tmax.buckets.1.date.buckets.0.tavg.value: 51.0 } + - match: { aggregations.tmax.buckets.1.date.buckets.0.tmax.value: 77.0 } + + - match: { aggregations.tmax.buckets.1.date.buckets.6.doc_count: 1 } + - match: { aggregations.tmax.buckets.1.date.buckets.6.tmin.value: 33.0 } + - match: { aggregations.tmax.buckets.1.date.buckets.6.tavg.value: 88.0 } + - match: { aggregations.tmax.buckets.1.date.buckets.6.tmax.value: 88.0 } + + - match: { aggregations.tmax.buckets.2.key: "100.0-*" } + - match: { aggregations.tmax.buckets.2.doc_count: 1 } + - length: { aggregations.tmax.buckets.2.date.buckets: 1 } + + - match: { aggregations.tmax.buckets.2.date.buckets.0.doc_count: 1 } + - match: { aggregations.tmax.buckets.2.date.buckets.0.tmin.value: 25.0 } + - match: { aggregations.tmax.buckets.2.date.buckets.0.tavg.value: 123.0 } + - match: { aggregations.tmax.buckets.2.date.buckets.0.tmax.value: 123.0 } diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java index cfe716eb57ca8..bb003568b8aa3 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/composite/CompositeAggregator.java @@ -32,6 +32,7 @@ package org.opensearch.search.aggregations.bucket.composite; +import org.apache.lucene.document.LongPoint; import org.apache.lucene.index.DocValues; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.NumericDocValues; @@ -75,6 +76,7 @@ import org.opensearch.search.aggregations.bucket.BucketsAggregator; import org.opensearch.search.aggregations.bucket.filterrewrite.CompositeAggregatorBridge; import org.opensearch.search.aggregations.bucket.filterrewrite.FilterRewriteOptimizationContext; +import org.opensearch.search.aggregations.bucket.filterrewrite.PackedValueRanges; import org.opensearch.search.aggregations.bucket.missing.MissingOrder; import org.opensearch.search.aggregations.bucket.terms.LongKeyedBucketOrds; import org.opensearch.search.internal.SearchContext; @@ -89,7 +91,6 @@ import java.util.List; import java.util.Map; import java.util.function.BiConsumer; -import java.util.function.Function; import java.util.function.LongUnaryOperator; import java.util.stream.Collectors; @@ -213,16 +214,25 @@ protected long[] processAfterKey(long[] bounds, long interval) { } @Override - protected int getSize() { + protected int rangeMax() { return size; } @Override - protected Function bucketOrdProducer() { - return (key) -> bucketOrds.add(0, getRoundingPrepared().round((long) key)); + protected long getOrd(int rangeIdx, PackedValueRanges ranges) { + assert(ranges != null); + long rangeStart = LongPoint.decodeDimension(ranges.getLower(rangeIdx), 0); + rangeStart = this.getFieldType().convertNanosToMillis(rangeStart); + long ord = bucketOrds.add(0, getRoundingPrepared().round(rangeStart)); + + if (ord < 0) { // already seen + ord = -1 - ord; + } + + return ord; } }; - filterRewriteOptimizationContext = new FilterRewriteOptimizationContext(bridge, parent, subAggregators.length, context); + filterRewriteOptimizationContext = new FilterRewriteOptimizationContext(bridge, parent, context); } @Override @@ -557,7 +567,12 @@ private void processLeafFromQuery(LeafReaderContext ctx, Sort indexSortPrefix) t @Override protected LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCollector sub) throws IOException { - boolean optimized = filterRewriteOptimizationContext.tryOptimize(ctx, this::incrementBucketDocCount, segmentMatchAll(context, ctx)); + boolean optimized = filterRewriteOptimizationContext.tryOptimize( + ctx, + this::incrementBucketDocCount, + sub, + segmentMatchAll(context, ctx) + ); if (optimized) throw new CollectionTerminatedException(); finishLeaf(); diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/AggregatorBridge.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/AggregatorBridge.java index 6b34582b259ea..c97849d83d2b9 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/AggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/AggregatorBridge.java @@ -11,11 +11,14 @@ import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.PointValues; import org.opensearch.index.mapper.MappedFieldType; +import org.opensearch.search.aggregations.LeafBucketCollector; import java.io.IOException; import java.util.function.BiConsumer; import java.util.function.Consumer; +import static org.opensearch.search.aggregations.bucket.filterrewrite.PointTreeTraversal.multiRangesTraverse; + /** * This interface provides a bridge between an aggregator and the optimization context, allowing * the aggregator to provide data and optimize the aggregation process. @@ -37,9 +40,9 @@ public abstract class AggregatorBridge { */ MappedFieldType fieldType; - Consumer setRanges; + Consumer setRanges; - void setRangesConsumer(Consumer setRanges) { + void setRangesConsumer(Consumer setRanges) { this.setRanges = setRanges; } @@ -67,18 +70,66 @@ void setRangesConsumer(Consumer setRanges) { * * @param leaf the leaf reader context for the segment */ - abstract Ranges tryBuildRangesFromSegment(LeafReaderContext leaf) throws IOException; + abstract PackedValueRanges tryBuildRangesFromSegment(LeafReaderContext leaf) throws IOException; + + /** + * @return max range to stop collecting at. + * Utilized by aggs which stop early. + */ + protected int rangeMax() { + return Integer.MAX_VALUE; + } + + /** + * Translate an index of the packed value range array to an agg bucket ordinal. + */ + protected long getOrd(int rangeIdx, PackedValueRanges ranges) { + return rangeIdx; + } /** - * Attempts to build aggregation results for a segment + * Attempts to build aggregation results for a segment. + * With no sub agg count docs and avoid iterating docIds. + * If a sub agg is present we must iterate through and collect docIds to support it. * - * @param values the point values (index structure for numeric values) for a segment - * @param incrementDocCount a consumer to increment the document count for a range bucket. The First parameter is document count, the second is the key of the bucket - * @param ranges + * @param values the point values (index structure for numeric values) for a segment + * @param incrementDocCount a consumer to increment the document count for a range bucket. The First parameter is document count, the second is the key of the bucket */ - abstract FilterRewriteOptimizationContext.DebugInfo tryOptimize( + public final FilterRewriteOptimizationContext.DebugInfo tryOptimize( PointValues values, BiConsumer incrementDocCount, - Ranges ranges - ) throws IOException; + PackedValueRanges ranges, + final LeafBucketCollector sub + ) throws IOException { + PointTreeTraversal.RangeAwareIntersectVisitor treeVisitor; + + if (sub != null) { + treeVisitor = new PointTreeTraversal.DocCollectRangeAwareIntersectVisitor( + values.getPointTree(), + ranges, + rangeMax(), + (activeIndex, docID) -> { + long ord = this.getOrd(activeIndex, ranges); + try { + incrementDocCount.accept(ord, (long) 1); + sub.collect(docID, ord); + } catch (IOException ioe) { + throw new RuntimeException(ioe); + } + } + ); + } else { + treeVisitor = new PointTreeTraversal.DocCountRangeAwareIntersectVisitor( + values.getPointTree(), + ranges, + rangeMax(), + (activeIndex, docCount) -> { + long ord = this.getOrd(activeIndex, ranges); + incrementDocCount.accept(ord, (long) docCount); + } + ); + } + + return multiRangesTraverse(treeVisitor); + } } diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/DateHistogramAggregatorBridge.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/DateHistogramAggregatorBridge.java index 8bff3fdc5fefb..59c96f644d1e8 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/DateHistogramAggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/DateHistogramAggregatorBridge.java @@ -8,9 +8,7 @@ package org.opensearch.search.aggregations.bucket.filterrewrite; -import org.apache.lucene.document.LongPoint; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PointValues; import org.apache.lucene.search.ScoreMode; import org.apache.lucene.search.Weight; import org.opensearch.common.Rounding; @@ -22,10 +20,6 @@ import java.io.IOException; import java.util.OptionalLong; -import java.util.function.BiConsumer; -import java.util.function.Function; - -import static org.opensearch.search.aggregations.bucket.filterrewrite.PointTreeTraversal.multiRangesTraverse; /** * For date histogram aggregation @@ -54,12 +48,12 @@ protected void buildRanges(SearchContext context) throws IOException { } @Override - final Ranges tryBuildRangesFromSegment(LeafReaderContext leaf) throws IOException { + final PackedValueRanges tryBuildRangesFromSegment(LeafReaderContext leaf) throws IOException { long[] bounds = Helper.getSegmentBounds(leaf, fieldType.name()); return buildRanges(bounds, maxRewriteFilters); } - private Ranges buildRanges(long[] bounds, int maxRewriteFilters) { + private PackedValueRanges buildRanges(long[] bounds, int maxRewriteFilters) { bounds = processHardBounds(bounds); if (bounds == null) { return null; @@ -116,47 +110,11 @@ protected long[] processHardBounds(long[] bounds, LongBounds hardBounds) { return bounds; } - private DateFieldMapper.DateFieldType getFieldType() { + public DateFieldMapper.DateFieldType getFieldType() { assert fieldType instanceof DateFieldMapper.DateFieldType; return (DateFieldMapper.DateFieldType) fieldType; } - protected int getSize() { - return Integer.MAX_VALUE; - } - - @Override - final FilterRewriteOptimizationContext.DebugInfo tryOptimize( - PointValues values, - BiConsumer incrementDocCount, - Ranges ranges - ) throws IOException { - int size = getSize(); - - DateFieldMapper.DateFieldType fieldType = getFieldType(); - BiConsumer incrementFunc = (activeIndex, docCount) -> { - long rangeStart = LongPoint.decodeDimension(ranges.lowers[activeIndex], 0); - rangeStart = fieldType.convertNanosToMillis(rangeStart); - long bucketOrd = getBucketOrd(bucketOrdProducer().apply(rangeStart)); - incrementDocCount.accept(bucketOrd, (long) docCount); - }; - - return multiRangesTraverse(values.getPointTree(), ranges, incrementFunc, size); - } - - private static long getBucketOrd(long bucketOrd) { - if (bucketOrd < 0) { // already seen - bucketOrd = -1 - bucketOrd; - } - - return bucketOrd; - } - - /** - * Provides a function to produce bucket ordinals from the lower bound of the range - */ - protected abstract Function bucketOrdProducer(); - /** * Checks whether the top level query matches all documents on the segment * diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/FilterRewriteOptimizationContext.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/FilterRewriteOptimizationContext.java index 87faafe4526de..8d4b1ef86639b 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/FilterRewriteOptimizationContext.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/FilterRewriteOptimizationContext.java @@ -15,6 +15,7 @@ import org.apache.lucene.index.NumericDocValues; import org.apache.lucene.index.PointValues; import org.opensearch.index.mapper.DocCountFieldMapper; +import org.opensearch.search.aggregations.LeafBucketCollector; import org.opensearch.search.internal.SearchContext; import java.io.IOException; @@ -40,7 +41,7 @@ public final class FilterRewriteOptimizationContext { private final AggregatorBridge aggregatorBridge; private String shardId; - private Ranges ranges; // built at shard level + private PackedValueRanges ranges; // built at shard level // debug info related fields private final AtomicInteger leafNodeVisited = new AtomicInteger(); @@ -48,28 +49,26 @@ public final class FilterRewriteOptimizationContext { private final AtomicInteger segments = new AtomicInteger(); private final AtomicInteger optimizedSegments = new AtomicInteger(); - public FilterRewriteOptimizationContext( - AggregatorBridge aggregatorBridge, - final Object parent, - final int subAggLength, - SearchContext context - ) throws IOException { + public FilterRewriteOptimizationContext(AggregatorBridge aggregatorBridge, final Object parent, SearchContext context) + throws IOException { this.aggregatorBridge = aggregatorBridge; - this.canOptimize = this.canOptimize(parent, subAggLength, context); + this.canOptimize = this.canOptimize(parent, context); } /** * common logic for checking whether the optimization can be applied and prepare at shard level * if the aggregation has any special logic, it should be done using {@link AggregatorBridge} */ - private boolean canOptimize(final Object parent, final int subAggLength, SearchContext context) throws IOException { + private boolean canOptimize(final Object parent, SearchContext context) throws IOException { if (context.maxAggRewriteFilters() == 0) return false; - if (parent != null || subAggLength != 0) return false; + if (parent != null) return false; boolean canOptimize = aggregatorBridge.canOptimize(); if (canOptimize) { - aggregatorBridge.setRangesConsumer(this::setRanges); + aggregatorBridge.setRangesConsumer((PackedValueRanges ranges) -> { + this.ranges = ranges; + }); this.shardId = context.indexShard().shardId().toString(); @@ -84,10 +83,6 @@ private boolean canOptimize(final Object parent, final int subAggLength, SearchC return canOptimize; } - void setRanges(Ranges ranges) { - this.ranges = ranges; - } - /** * Try to populate the bucket doc counts for aggregation *

@@ -96,8 +91,12 @@ void setRanges(Ranges ranges) { * @param incrementDocCount consume the doc_count results for certain ordinal * @param segmentMatchAll if your optimization can prepareFromSegment, you should pass in this flag to decide whether to prepareFromSegment */ - public boolean tryOptimize(final LeafReaderContext leafCtx, final BiConsumer incrementDocCount, boolean segmentMatchAll) - throws IOException { + public boolean tryOptimize( + final LeafReaderContext leafCtx, + final BiConsumer incrementDocCount, + LeafBucketCollector sub, + boolean segmentMatchAll + ) throws IOException { segments.incrementAndGet(); if (!canOptimize) { return false; @@ -120,10 +119,10 @@ public boolean tryOptimize(final LeafReaderContext leafCtx, final BiConsumer= 0; + } + + public static boolean withinUpperBound(byte[] value, byte[] upperBound) { + return compareByteValue(value, upperBound) < 0; + } + + public byte[] getLower(int idx) { + return lowers[idx]; + } + + public byte[] getUpper(int idx) { + return uppers[idx]; + } + + public boolean withinLowerBound(byte[] value, int idx) { + return PackedValueRanges.withinLowerBound(value, lowers[idx]); + } + + public boolean withinUpperBound(byte[] value, int idx) { + return PackedValueRanges.withinUpperBound(value, uppers[idx]); + } + + public boolean withinRange(byte[] value, int idx) { + return withinLowerBound(value, idx) && withinUpperBound(value, idx); + } + public int firstRangeIndex(byte[] globalMin, byte[] globalMax) { if (compareByteValue(lowers[0], globalMax) > 0) { return -1; @@ -42,16 +75,4 @@ public int firstRangeIndex(byte[] globalMin, byte[] globalMax) { } return i; } - - public static int compareByteValue(byte[] value1, byte[] value2) { - return comparator.compare(value1, 0, value2, 0); - } - - public static boolean withinLowerBound(byte[] value, byte[] lowerBound) { - return compareByteValue(value, lowerBound) >= 0; - } - - public static boolean withinUpperBound(byte[] value, byte[] upperBound) { - return compareByteValue(value, upperBound) < 0; - } } diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/PointTreeTraversal.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/PointTreeTraversal.java index 581ecc416f486..331893ea8af49 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/PointTreeTraversal.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/PointTreeTraversal.java @@ -13,7 +13,6 @@ import org.apache.lucene.index.PointValues; import org.apache.lucene.search.CollectionTerminatedException; import org.apache.lucene.search.DocIdSetIterator; -import org.opensearch.common.CheckedRunnable; import java.io.IOException; import java.util.function.BiConsumer; @@ -23,201 +22,240 @@ /** * Utility class for traversing a {@link PointValues.PointTree} and collecting document counts for the ranges. * - *

The main entry point is the {@link #multiRangesTraverse(PointValues.PointTree, Ranges, - * BiConsumer, int)} method + *

The main entry point is the {@link #multiRangesTraverse(RangeAwareIntersectVisitor)} method * - *

The class uses a {@link RangeCollectorForPointTree} to keep track of the active ranges and - * determine which parts of the tree to visit. The {@link - * PointValues.IntersectVisitor} implementation is responsible for the actual visitation and - * document count collection. + *

The class uses a {@link RangeAwareIntersectVisitor} to keep track of the active ranges, traverse the tree, and + * consume documents. */ -final class PointTreeTraversal { - private PointTreeTraversal() {} - +public final class PointTreeTraversal { private static final Logger logger = LogManager.getLogger(Helper.loggerName); /** - * Traverses the given {@link PointValues.PointTree} and collects document counts for the intersecting ranges. - * - * @param tree the point tree to traverse - * @param ranges the set of ranges to intersect with - * @param incrementDocCount a callback to increment the document count for a range bucket - * @param maxNumNonZeroRanges the maximum number of non-zero ranges to collect - * @return a {@link FilterRewriteOptimizationContext.DebugInfo} object containing debug information about the traversal + * Traverse the RangeAwareIntersectVisitor PointTree. + * Collects and returns DebugInfo from traversal + * @param visitor the maximum number of non-zero ranges to collect + * @return a {@link FilterRewriteOptimizationContext.DebugInfo} object containing debug information about the traversal */ - static FilterRewriteOptimizationContext.DebugInfo multiRangesTraverse( - final PointValues.PointTree tree, - final Ranges ranges, - final BiConsumer incrementDocCount, - final int maxNumNonZeroRanges - ) throws IOException { + public static FilterRewriteOptimizationContext.DebugInfo multiRangesTraverse(RangeAwareIntersectVisitor visitor) throws IOException { FilterRewriteOptimizationContext.DebugInfo debugInfo = new FilterRewriteOptimizationContext.DebugInfo(); - int activeIndex = ranges.firstRangeIndex(tree.getMinPackedValue(), tree.getMaxPackedValue()); - if (activeIndex < 0) { + + if (visitor.getActiveIndex() < 0) { logger.debug("No ranges match the query, skip the fast filter optimization"); return debugInfo; } - RangeCollectorForPointTree collector = new RangeCollectorForPointTree(incrementDocCount, maxNumNonZeroRanges, ranges, activeIndex); - PointValues.IntersectVisitor visitor = getIntersectVisitor(collector); + try { - intersectWithRanges(visitor, tree, collector, debugInfo); + visitor.traverse(debugInfo); } catch (CollectionTerminatedException e) { logger.debug("Early terminate since no more range to collect"); } - collector.finalizePreviousRange(); return debugInfo; } - private static void intersectWithRanges( - PointValues.IntersectVisitor visitor, - PointValues.PointTree pointTree, - RangeCollectorForPointTree collector, - FilterRewriteOptimizationContext.DebugInfo debug - ) throws IOException { - PointValues.Relation r = visitor.compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); - - switch (r) { - case CELL_INSIDE_QUERY: - collector.countNode((int) pointTree.size()); - debug.visitInner(); - break; - case CELL_CROSSES_QUERY: - if (pointTree.moveToChild()) { - do { - intersectWithRanges(visitor, pointTree, collector, debug); - } while (pointTree.moveToSibling()); - pointTree.moveToParent(); - } else { - pointTree.visitDocValues(visitor); - debug.visitLeaf(); - } - break; - case CELL_OUTSIDE_QUERY: + /** + * This IntersectVisitor contains a packed value representation of Ranges + * as well as the current activeIndex being considered for collection. + */ + public static abstract class RangeAwareIntersectVisitor implements PointValues.IntersectVisitor { + private final PointValues.PointTree pointTree; + private final PackedValueRanges packedValueRanges; + private final int maxNumNonZeroRange; + protected int visitedRange = 0; + protected int activeIndex; + + public RangeAwareIntersectVisitor(PointValues.PointTree pointTree, PackedValueRanges packedValueRanges, int maxNumNonZeroRange) { + this.packedValueRanges = packedValueRanges; + this.pointTree = pointTree; + this.maxNumNonZeroRange = maxNumNonZeroRange; + this.activeIndex = packedValueRanges.firstRangeIndex(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); } - } - private static PointValues.IntersectVisitor getIntersectVisitor(RangeCollectorForPointTree collector) { - return new PointValues.IntersectVisitor() { - @Override - public void visit(int docID) { - // this branch should be unreachable - throw new UnsupportedOperationException( - "This IntersectVisitor does not perform any actions on a " + "docID=" + docID + " node being visited" - ); + public long getActiveIndex() { + return activeIndex; + } + + public abstract void visit(int docID); + + public abstract void visit(int docID, byte[] packedValue); + + public abstract void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException; + + protected abstract void consumeContainedNode(PointValues.PointTree pointTree) throws IOException; + + protected abstract void consumeCrossedNode(PointValues.PointTree pointTree) throws IOException; + + public void traverse(FilterRewriteOptimizationContext.DebugInfo debug) throws IOException { + PointValues.Relation r = compare(pointTree.getMinPackedValue(), pointTree.getMaxPackedValue()); + switch (r) { + case CELL_INSIDE_QUERY: + consumeContainedNode(pointTree); + debug.visitInner(); + break; + case CELL_CROSSES_QUERY: + if (pointTree.moveToChild()) { + do { + traverse(debug); + } while (pointTree.moveToSibling()); + pointTree.moveToParent(); + } else { + consumeCrossedNode(pointTree); + debug.visitLeaf(); + } + break; + case CELL_OUTSIDE_QUERY: } + } - @Override - public void visit(int docID, byte[] packedValue) throws IOException { - visitPoints(packedValue, collector::count); + /** + * increment activeIndex until we run out of ranges or find a valid range that contains maxPackedValue + * else throw CollectionTerminatedException if we run out of ranges to check + * @param minPackedValue lower bound of PointValues.PointTree node + * @param maxPackedValue upper bound of PointValues.PointTree node + * @return the min/max values of the PointValues.PointTree node can be one of: + * 1.) Completely outside the activeIndex range + * 2.) Completely inside the activeIndex range + * 3.) Overlapping with the activeIndex range + */ + @Override + public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { + // try to find the first range that may collect values from this cell + if (!packedValueRanges.withinUpperBound(minPackedValue, activeIndex) && iterateRangeEnd(minPackedValue)) { + throw new CollectionTerminatedException(); } - @Override - public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException { - visitPoints(packedValue, () -> { - for (int doc = iterator.nextDoc(); doc != NO_MORE_DOCS; doc = iterator.nextDoc()) { - collector.count(); - } - }); + // after the loop, min < upper + // cell could be outside [min max] lower + if (!packedValueRanges.withinLowerBound(maxPackedValue, activeIndex) && iterateRangeEnd(maxPackedValue)) { + return PointValues.Relation.CELL_OUTSIDE_QUERY; } - private void visitPoints(byte[] packedValue, CheckedRunnable collect) throws IOException { - if (!collector.withinUpperBound(packedValue)) { - collector.finalizePreviousRange(); - if (collector.iterateRangeEnd(packedValue)) { - throw new CollectionTerminatedException(); - } - } + if (packedValueRanges.withinRange(minPackedValue, activeIndex) && packedValueRanges.withinRange(maxPackedValue, activeIndex)) { + return PointValues.Relation.CELL_INSIDE_QUERY; + } + return PointValues.Relation.CELL_CROSSES_QUERY; + } - if (collector.withinRange(packedValue)) { - collect.run(); - } + /** + * throws CollectionTerminatedException if we have reached our last range, and it does not contain packedValue + * @param packedValue determine if packedValue falls within the range at activeIndex + * @return true when packedValue falls within the activeIndex range + */ + protected boolean canCollect(byte[] packedValue) { + if (!packedValueRanges.withinUpperBound(packedValue, activeIndex) && iterateRangeEnd(packedValue)) { + throw new CollectionTerminatedException(); } + return packedValueRanges.withinRange(packedValue, activeIndex); + } - @Override - public PointValues.Relation compare(byte[] minPackedValue, byte[] maxPackedValue) { - // try to find the first range that may collect values from this cell - if (!collector.withinUpperBound(minPackedValue)) { - collector.finalizePreviousRange(); - if (collector.iterateRangeEnd(minPackedValue)) { - throw new CollectionTerminatedException(); - } - } - // after the loop, min < upper - // cell could be outside [min max] lower - if (!collector.withinLowerBound(maxPackedValue)) { - return PointValues.Relation.CELL_OUTSIDE_QUERY; - } - if (collector.withinRange(minPackedValue) && collector.withinRange(maxPackedValue)) { - return PointValues.Relation.CELL_INSIDE_QUERY; + /** + * @param packedValue increment active index until we reach a range containing value + * @return true when we've exhausted all available ranges or visited maxNumNonZeroRange and can stop early + */ + protected boolean iterateRangeEnd(byte[] packedValue) { + // the new value may not be contiguous to the previous one + // so try to find the first next range that cross the new value + while (!packedValueRanges.withinUpperBound(packedValue, activeIndex)) { + if (++activeIndex >= packedValueRanges.size) { + return true; } - return PointValues.Relation.CELL_CROSSES_QUERY; } - }; + visitedRange++; + return visitedRange > maxNumNonZeroRange; + } } - private static class RangeCollectorForPointTree { - private final BiConsumer incrementRangeDocCount; - private int counter = 0; - - private final Ranges ranges; - private int activeIndex; - - private int visitedRange = 0; - private final int maxNumNonZeroRange; + /** + * Traverse PointTree with countDocs callback where countDock inputs are + * 1.) activeIndex for range in which document(s) reside + * 2.) total documents counted + */ + public static class DocCountRangeAwareIntersectVisitor extends RangeAwareIntersectVisitor { + BiConsumer countDocs; - public RangeCollectorForPointTree( - BiConsumer incrementRangeDocCount, + public DocCountRangeAwareIntersectVisitor( + PointValues.PointTree pointTree, + PackedValueRanges packedValueRanges, int maxNumNonZeroRange, - Ranges ranges, - int activeIndex + BiConsumer countDocs ) { - this.incrementRangeDocCount = incrementRangeDocCount; - this.maxNumNonZeroRange = maxNumNonZeroRange; - this.ranges = ranges; - this.activeIndex = activeIndex; + super(pointTree, packedValueRanges, maxNumNonZeroRange); + this.countDocs = countDocs; } - private void count() { - counter++; + @Override + public void visit(int docID) { + countDocs.accept(activeIndex, 1); } - private void countNode(int count) { - counter += count; + @Override + public void visit(int docID, byte[] packedValue) { + if (canCollect(packedValue)) { + countDocs.accept(activeIndex, 1); + } } - private void finalizePreviousRange() { - if (counter > 0) { - incrementRangeDocCount.accept(activeIndex, counter); - counter = 0; + public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException { + if (canCollect(packedValue)) { + for (int doc = iterator.nextDoc(); doc != NO_MORE_DOCS; doc = iterator.nextDoc()) { + countDocs.accept(activeIndex, 1); + } } } - /** - * @return true when iterator exhausted or collect enough non-zero ranges - */ - private boolean iterateRangeEnd(byte[] value) { - // the new value may not be contiguous to the previous one - // so try to find the first next range that cross the new value - while (!withinUpperBound(value)) { - if (++activeIndex >= ranges.size) { - return true; - } + protected void consumeContainedNode(PointValues.PointTree pointTree) throws IOException { + countDocs.accept(activeIndex, (int) pointTree.size()); + } + + protected void consumeCrossedNode(PointValues.PointTree pointTree) throws IOException { + pointTree.visitDocValues(this); + } + } + + /** + * Traverse PointTree with collectDocs callback where collectDocs inputs are + * 1.) activeIndex for range in which document(s) reside + * 2.) document id to collect + */ + public static class DocCollectRangeAwareIntersectVisitor extends RangeAwareIntersectVisitor { + BiConsumer collectDocs; + + public DocCollectRangeAwareIntersectVisitor( + PointValues.PointTree pointTree, + PackedValueRanges packedValueRanges, + int maxNumNonZeroRange, + BiConsumer collectDocs + ) { + super(pointTree, packedValueRanges, maxNumNonZeroRange); + this.collectDocs = collectDocs; + } + + @Override + public void visit(int docID) { + collectDocs.accept(activeIndex, docID); + } + + @Override + public void visit(int docID, byte[] packedValue) { + if (canCollect(packedValue)) { + collectDocs.accept(activeIndex, docID); } - visitedRange++; - return visitedRange > maxNumNonZeroRange; } - private boolean withinLowerBound(byte[] value) { - return Ranges.withinLowerBound(value, ranges.lowers[activeIndex]); + public void visit(DocIdSetIterator iterator, byte[] packedValue) throws IOException { + if (canCollect(packedValue)) { + for (int doc = iterator.nextDoc(); doc != NO_MORE_DOCS; doc = iterator.nextDoc()) { + collectDocs.accept(activeIndex, iterator.docID()); + } + } } - private boolean withinUpperBound(byte[] value) { - return Ranges.withinUpperBound(value, ranges.uppers[activeIndex]); + protected void consumeContainedNode(PointValues.PointTree pointTree) throws IOException { + pointTree.visitDocIDs(this); } - private boolean withinRange(byte[] value) { - return withinLowerBound(value) && withinUpperBound(value); + protected void consumeCrossedNode(PointValues.PointTree pointTree) throws IOException { + pointTree.visitDocValues(this); } } } diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/RangeAggregatorBridge.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/RangeAggregatorBridge.java index b590a444c8b04..77ef6b231755b 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/RangeAggregatorBridge.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/filterrewrite/RangeAggregatorBridge.java @@ -9,19 +9,12 @@ package org.opensearch.search.aggregations.bucket.filterrewrite; import org.apache.lucene.index.LeafReaderContext; -import org.apache.lucene.index.PointValues; import org.opensearch.index.mapper.MappedFieldType; import org.opensearch.index.mapper.NumericPointEncoder; import org.opensearch.search.aggregations.bucket.range.RangeAggregator; import org.opensearch.search.aggregations.support.ValuesSource; import org.opensearch.search.aggregations.support.ValuesSourceConfig; -import java.io.IOException; -import java.util.function.BiConsumer; -import java.util.function.Function; - -import static org.opensearch.search.aggregations.bucket.filterrewrite.PointTreeTraversal.multiRangesTraverse; - /** * For range aggregation */ @@ -65,32 +58,11 @@ protected void buildRanges(RangeAggregator.Range[] ranges) { uppers[i] = upper; } - setRanges.accept(new Ranges(lowers, uppers)); + setRanges.accept(new PackedValueRanges(lowers, uppers)); } @Override - final Ranges tryBuildRangesFromSegment(LeafReaderContext leaf) { + final PackedValueRanges tryBuildRangesFromSegment(LeafReaderContext leaf) { throw new UnsupportedOperationException("Range aggregation should not build ranges at segment level"); } - - @Override - final FilterRewriteOptimizationContext.DebugInfo tryOptimize( - PointValues values, - BiConsumer incrementDocCount, - Ranges ranges - ) throws IOException { - int size = Integer.MAX_VALUE; - - BiConsumer incrementFunc = (activeIndex, docCount) -> { - long bucketOrd = bucketOrdProducer().apply(activeIndex); - incrementDocCount.accept(bucketOrd, (long) docCount); - }; - - return multiRangesTraverse(values.getPointTree(), ranges, incrementFunc, size); - } - - /** - * Provides a function to produce bucket ordinals from index of the corresponding range in the range array - */ - protected abstract Function bucketOrdProducer(); } diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java index f3a36b4882d19..b4402575c37a7 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/AutoDateHistogramAggregator.java @@ -31,6 +31,7 @@ package org.opensearch.search.aggregations.bucket.histogram; +import org.apache.lucene.document.LongPoint; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.CollectionTerminatedException; @@ -57,6 +58,7 @@ import org.opensearch.search.aggregations.bucket.filterrewrite.FilterRewriteOptimizationContext; import org.opensearch.search.aggregations.bucket.histogram.AutoDateHistogramAggregationBuilder.RoundingInfo; import org.opensearch.search.aggregations.bucket.terms.LongKeyedBucketOrds; +import org.opensearch.search.aggregations.bucket.filterrewrite.PackedValueRanges; import org.opensearch.search.aggregations.support.ValuesSource; import org.opensearch.search.aggregations.support.ValuesSourceConfig; import org.opensearch.search.internal.SearchContext; @@ -200,11 +202,20 @@ protected Prepared getRoundingPrepared() { } @Override - protected Function bucketOrdProducer() { - return (key) -> getBucketOrds().add(0, preparedRounding.round((long) key)); + protected long getOrd(int rangeIdx, PackedValueRanges ranges) { + assert(ranges != null); + long rangeStart = LongPoint.decodeDimension(ranges.getLower(rangeIdx), 0); + rangeStart = this.getFieldType().convertNanosToMillis(rangeStart); + long ord = getBucketOrds().add(0, getRoundingPrepared().round(rangeStart)); + + if (ord < 0) { // already seen + ord = -1 - ord; + } + + return ord; } }; - filterRewriteOptimizationContext = new FilterRewriteOptimizationContext(bridge, parent, subAggregators.length, context); + filterRewriteOptimizationContext = new FilterRewriteOptimizationContext(bridge, parent, context); } protected abstract LongKeyedBucketOrds getBucketOrds(); @@ -236,7 +247,12 @@ public final LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBuc return LeafBucketCollector.NO_OP_COLLECTOR; } - boolean optimized = filterRewriteOptimizationContext.tryOptimize(ctx, this::incrementBucketDocCount, segmentMatchAll(context, ctx)); + boolean optimized = filterRewriteOptimizationContext.tryOptimize( + ctx, + this::incrementBucketDocCount, + sub, + segmentMatchAll(context, ctx) + ); if (optimized) throw new CollectionTerminatedException(); final SortedNumericDocValues values = valuesSource.longValues(ctx); diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java index 96a49bc3fd5f6..97a193fd62385 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/histogram/DateHistogramAggregator.java @@ -31,6 +31,7 @@ package org.opensearch.search.aggregations.bucket.histogram; +import org.apache.lucene.document.LongPoint; import org.apache.lucene.index.LeafReaderContext; import org.apache.lucene.index.SortedNumericDocValues; import org.apache.lucene.search.CollectionTerminatedException; @@ -50,6 +51,7 @@ import org.opensearch.search.aggregations.bucket.BucketsAggregator; import org.opensearch.search.aggregations.bucket.filterrewrite.DateHistogramAggregatorBridge; import org.opensearch.search.aggregations.bucket.filterrewrite.FilterRewriteOptimizationContext; +import org.opensearch.search.aggregations.bucket.filterrewrite.PackedValueRanges; import org.opensearch.search.aggregations.bucket.terms.LongKeyedBucketOrds; import org.opensearch.search.aggregations.support.ValuesSource; import org.opensearch.search.aggregations.support.ValuesSourceConfig; @@ -59,7 +61,6 @@ import java.util.Collections; import java.util.Map; import java.util.function.BiConsumer; -import java.util.function.Function; import static org.opensearch.search.aggregations.bucket.filterrewrite.DateHistogramAggregatorBridge.segmentMatchAll; @@ -146,11 +147,20 @@ protected long[] processHardBounds(long[] bounds) { } @Override - protected Function bucketOrdProducer() { - return (key) -> bucketOrds.add(0, preparedRounding.round((long) key)); + protected long getOrd(int rangeIdx, PackedValueRanges ranges) { + assert(ranges != null); + long rangeStart = LongPoint.decodeDimension(ranges.getLower(rangeIdx), 0); + rangeStart = this.getFieldType().convertNanosToMillis(rangeStart); + long ord = bucketOrds.add(0, getRoundingPrepared().round(rangeStart)); + + if (ord < 0) { // already seen + ord = -1 - ord; + } + + return ord; } }; - filterRewriteOptimizationContext = new FilterRewriteOptimizationContext(bridge, parent, subAggregators.length, context); + filterRewriteOptimizationContext = new FilterRewriteOptimizationContext(bridge, parent, context); } @Override @@ -167,7 +177,12 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, LeafBucketCol return LeafBucketCollector.NO_OP_COLLECTOR; } - boolean optimized = filterRewriteOptimizationContext.tryOptimize(ctx, this::incrementBucketDocCount, segmentMatchAll(context, ctx)); + boolean optimized = filterRewriteOptimizationContext.tryOptimize( + ctx, + this::incrementBucketDocCount, + sub, + segmentMatchAll(context, ctx) + ); if (optimized) throw new CollectionTerminatedException(); SortedNumericDocValues values = valuesSource.longValues(ctx); diff --git a/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java b/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java index 17461f228e993..5a052fcd8ecb1 100644 --- a/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java +++ b/server/src/main/java/org/opensearch/search/aggregations/bucket/range/RangeAggregator.java @@ -67,7 +67,6 @@ import java.util.Map; import java.util.Objects; import java.util.function.BiConsumer; -import java.util.function.Function; import static org.opensearch.core.xcontent.ConstructingObjectParser.optionalConstructorArg; @@ -291,13 +290,8 @@ protected boolean canOptimize() { protected void prepare() { buildRanges(ranges); } - - @Override - protected Function bucketOrdProducer() { - return (activeIndex) -> subBucketOrdinal(0, (int) activeIndex); - } }; - filterRewriteOptimizationContext = new FilterRewriteOptimizationContext(bridge, parent, subAggregators.length, context); + filterRewriteOptimizationContext = new FilterRewriteOptimizationContext(bridge, parent, context); } @Override @@ -310,7 +304,7 @@ public ScoreMode scoreMode() { @Override public LeafBucketCollector getLeafCollector(LeafReaderContext ctx, final LeafBucketCollector sub) throws IOException { - boolean optimized = filterRewriteOptimizationContext.tryOptimize(ctx, this::incrementBucketDocCount, false); + boolean optimized = filterRewriteOptimizationContext.tryOptimize(ctx, this::incrementBucketDocCount, sub, false); if (optimized) throw new CollectionTerminatedException(); final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx);