Skip to content

Commit

Permalink
Add histogram field type support to boxplot aggs
Browse files Browse the repository at this point in the history
Add support for the histogram field type to boxplot aggs.

Closes elastic#52233
Relates to elastic#33112
  • Loading branch information
imotov committed Feb 12, 2020
1 parent 6b62ec5 commit a428f17
Show file tree
Hide file tree
Showing 6 changed files with 73 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
=== Boxplot Aggregation

A `boxplot` metrics aggregation that computes boxplot of numeric values extracted from the aggregated documents.
These values can be extracted either from specific numeric fields in the documents, or be generated by a provided script.
These values can be generated by a provided script or extracted from specific numeric or
<<histogram,histogram fields>> in the documents.

The `boxplot` aggregation returns essential information for making a https://en.wikipedia.org/wiki/Box_plot[box plot]: minimum, maximum
median, first quartile (25th percentile) and third quartile (75th percentile) values.
Expand Down
1 change: 1 addition & 0 deletions docs/reference/mapping/types/histogram.asciidoc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ following aggregations and queries:

* <<search-aggregations-metrics-percentile-aggregation,percentiles>> aggregation
* <<search-aggregations-metrics-percentile-rank-aggregation,percentile ranks>> aggregation
* <<search-aggregations-metrics-boxplot-aggregation,boxplot>> aggregation
* <<query-dsl-exists-query,exists>> query

[[mapping-types-histogram-building-histogram]]
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,15 +29,15 @@

import static org.elasticsearch.search.aggregations.metrics.PercentilesMethod.COMPRESSION_FIELD;

public class BoxplotAggregationBuilder extends ValuesSourceAggregationBuilder.LeafOnly<ValuesSource.Numeric,
public class BoxplotAggregationBuilder extends ValuesSourceAggregationBuilder.LeafOnly<ValuesSource,
BoxplotAggregationBuilder> {
public static final String NAME = "boxplot";

private static final ObjectParser<BoxplotAggregationBuilder, Void> PARSER;

static {
PARSER = new ObjectParser<>(BoxplotAggregationBuilder.NAME);
ValuesSourceParserHelper.declareNumericFields(PARSER, true, true, false);
ValuesSourceParserHelper.declareAnyFields(PARSER, true, true);
PARSER.declareDouble(BoxplotAggregationBuilder::compression, COMPRESSION_FIELD);
}

Expand Down Expand Up @@ -98,7 +98,7 @@ public double compression() {

@Override
protected BoxplotAggregatorFactory innerBuild(QueryShardContext queryShardContext,
ValuesSourceConfig<ValuesSource.Numeric> config,
ValuesSourceConfig<ValuesSource> config,
AggregatorFactory parent,
AggregatorFactories.Builder subFactoriesBuilder) throws IOException {
return new BoxplotAggregatorFactory(name, config, compression, queryShardContext, parent, subFactoriesBuilder, metaData);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
import org.elasticsearch.common.lease.Releasables;
import org.elasticsearch.common.util.BigArrays;
import org.elasticsearch.common.util.ObjectArray;
import org.elasticsearch.index.fielddata.HistogramValue;
import org.elasticsearch.index.fielddata.HistogramValues;
import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
import org.elasticsearch.search.DocValueFormat;
import org.elasticsearch.search.aggregations.Aggregator;
Expand All @@ -29,12 +31,12 @@

public class BoxplotAggregator extends NumericMetricsAggregator.MultiValue {

private final ValuesSource.Numeric valuesSource;
private final ValuesSource valuesSource;
private final DocValueFormat format;
protected ObjectArray<TDigestState> states;
protected final double compression;

BoxplotAggregator(String name, ValuesSource.Numeric valuesSource, DocValueFormat formatter, double compression,
BoxplotAggregator(String name, ValuesSource valuesSource, DocValueFormat formatter, double compression,
SearchContext context, Aggregator parent, List<PipelineAggregator> pipelineAggregators,
Map<String, Object> metaData) throws IOException {
super(name, context, parent, pipelineAggregators, metaData);
Expand All @@ -58,23 +60,38 @@ public LeafBucketCollector getLeafCollector(LeafReaderContext ctx,
return LeafBucketCollector.NO_OP_COLLECTOR;
}
final BigArrays bigArrays = context.bigArrays();
final SortedNumericDoubleValues values = valuesSource.doubleValues(ctx);
return new LeafBucketCollectorBase(sub, values) {
@Override
public void collect(int doc, long bucket) throws IOException {
states = bigArrays.grow(states, bucket + 1);

if (values.advanceExact(doc)) {
if (valuesSource instanceof ValuesSource.Histogram) {
final HistogramValues values = ((ValuesSource.Histogram)valuesSource).getHistogramValues(ctx);
return new LeafBucketCollectorBase(sub, values) {
@Override
public void collect(int doc, long bucket) throws IOException {
TDigestState state = getExistingOrNewHistogram(bigArrays, bucket);
if (values.advanceExact(doc)) {
final int valueCount = values.docValueCount();
for (int i = 0; i < valueCount; i++) {
state.add(values.nextValue());
final HistogramValue sketch = values.histogram();
while(sketch.next()) {
state.add(sketch.value(), sketch.count());
}
}
}
}
};
};
} else {
final SortedNumericDoubleValues values = ((ValuesSource.Numeric)valuesSource).doubleValues(ctx);
return new LeafBucketCollectorBase(sub, values) {
@Override
public void collect(int doc, long bucket) throws IOException {
states = bigArrays.grow(states, bucket + 1);
if (values.advanceExact(doc)) {
TDigestState state = getExistingOrNewHistogram(bigArrays, bucket);
if (values.advanceExact(doc)) {
final int valueCount = values.docValueCount();
for (int i = 0; i < valueCount; i++) {
state.add(values.nextValue());
}
}
}
}
};
}
}

private TDigestState getExistingOrNewHistogram(final BigArrays bigArrays, long bucket) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@
import java.util.List;
import java.util.Map;

public class BoxplotAggregatorFactory extends ValuesSourceAggregatorFactory<ValuesSource.Numeric> {
public class BoxplotAggregatorFactory extends ValuesSourceAggregatorFactory<ValuesSource> {

private final double compression;

BoxplotAggregatorFactory(String name,
ValuesSourceConfig<ValuesSource.Numeric> config,
ValuesSourceConfig<ValuesSource> config,
double compression,
QueryShardContext queryShardContext,
AggregatorFactory parent,
Expand All @@ -46,7 +46,7 @@ protected Aggregator createUnmapped(SearchContext searchContext,
}

@Override
protected Aggregator doCreateInternal(ValuesSource.Numeric valuesSource,
protected Aggregator doCreateInternal(ValuesSource valuesSource,
SearchContext searchContext,
Aggregator parent,
boolean collectsFromSingleBucket,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
import org.elasticsearch.search.aggregations.metrics.TDigestState;
import org.elasticsearch.test.ESSingleNodeTestCase;
import org.elasticsearch.xpack.analytics.AnalyticsPlugin;
import org.elasticsearch.xpack.analytics.boxplot.Boxplot;
import org.elasticsearch.xpack.analytics.boxplot.BoxplotAggregationBuilder;
import org.elasticsearch.xpack.core.LocalStateCompositeXPackPlugin;

import java.util.ArrayList;
Expand Down Expand Up @@ -131,8 +133,7 @@ public void testHDRHistogram() throws Exception {
}
}

public void testTDigestHistogram() throws Exception {

private void setupTDigestHistogram(int compression) throws Exception {
XContentBuilder xContentBuilder = XContentFactory.jsonBuilder()
.startObject()
.startObject("_doc")
Expand Down Expand Up @@ -170,8 +171,6 @@ public void testTDigestHistogram() throws Exception {
PutMappingRequest request2 = new PutMappingRequest("pre_agg").source(xContentBuilder2);
client().admin().indices().putMapping(request2).actionGet();


int compression = TestUtil.nextInt(random(), 200, 300);
TDigestState histogram = new TDigestState(compression);
BulkRequest bulkRequest = new BulkRequest();

Expand Down Expand Up @@ -218,6 +217,11 @@ public void testTDigestHistogram() throws Exception {

response = client().prepareSearch("pre_agg").get();
assertEquals(numDocs / frq, response.getHits().getTotalHits().value);
}

public void testTDigestHistogram() throws Exception {
int compression = TestUtil.nextInt(random(), 200, 300);
setupTDigestHistogram(compression);

PercentilesAggregationBuilder builder =
AggregationBuilders.percentiles("agg").field("inner.data").method(PercentilesMethod.TDIGEST)
Expand All @@ -236,6 +240,31 @@ public void testTDigestHistogram() throws Exception {
}
}

public void testBoxplotHistogram() throws Exception {
int compression = TestUtil.nextInt(random(), 200, 300);
setupTDigestHistogram(compression);
BoxplotAggregationBuilder bpBuilder = new BoxplotAggregationBuilder("agg").field("inner.data").compression(compression);

SearchResponse bpResponseRaw = client().prepareSearch("raw").addAggregation(bpBuilder).get();
SearchResponse bpResponsePreAgg = client().prepareSearch("pre_agg").addAggregation(bpBuilder).get();
SearchResponse bpResponseBoth = client().prepareSearch("raw", "pre_agg").addAggregation(bpBuilder).get();

Boxplot bpRaw = bpResponseRaw.getAggregations().get("agg");
Boxplot bpPreAgg = bpResponsePreAgg.getAggregations().get("agg");
Boxplot bpBoth = bpResponseBoth.getAggregations().get("agg");
assertEquals(bpRaw.getMax(), bpPreAgg.getMax(), 0.0);
assertEquals(bpRaw.getMax(), bpBoth.getMax(), 0.0);
assertEquals(bpRaw.getMin(), bpPreAgg.getMin(), 0.0);
assertEquals(bpRaw.getMin(), bpBoth.getMin(), 0.0);

assertEquals(bpRaw.getQ1(), bpPreAgg.getQ1(), 1.0);
assertEquals(bpRaw.getQ1(), bpBoth.getQ1(), 1.0);
assertEquals(bpRaw.getQ2(), bpPreAgg.getQ2(), 1.0);
assertEquals(bpRaw.getQ2(), bpBoth.getQ2(), 1.0);
assertEquals(bpRaw.getQ3(), bpPreAgg.getQ3(), 1.0);
assertEquals(bpRaw.getQ3(), bpBoth.getQ3(), 1.0);
}

@Override
protected Collection<Class<? extends Plugin>> getPlugins() {
List<Class<? extends Plugin>> plugins = new ArrayList<>(super.getPlugins());
Expand Down

0 comments on commit a428f17

Please sign in to comment.