Add moving percentiles pipeline aggregation (#55441)

Similar to what the moving function aggregation does, except merging windows of percentiles sketches together instead of cumulatively merging final metrics
elastic · May 12, 2020 · 4e39184 · 4e39184
1 parent b942aed
commit 4e39184
Show file tree

Hide file tree

Showing 16 changed files with 1,173 additions and 15 deletions.
diff --git a/docs/reference/aggregations/pipeline.asciidoc b/docs/reference/aggregations/pipeline.asciidoc
@@ -286,3 +286,4 @@ include::pipeline/bucket-script-aggregation.asciidoc[]
 include::pipeline/bucket-selector-aggregation.asciidoc[]
 include::pipeline/bucket-sort-aggregation.asciidoc[]
 include::pipeline/serial-diff-aggregation.asciidoc[]
+include::pipeline/moving-percentiles-aggregation.asciidoc[]
diff --git a/docs/reference/aggregations/pipeline/moving-percentiles-aggregation.asciidoc b/docs/reference/aggregations/pipeline/moving-percentiles-aggregation.asciidoc
@@ -0,0 +1,162 @@
+[role="xpack"]
+[testenv="basic"]
+[[search-aggregations-pipeline-moving-percentiles-aggregation]]
+=== Moving Percentiles Aggregation
+
+Given an ordered series of <<search-aggregations-metrics-percentile-aggregation, percentiles>>, the Moving Percentile aggregation
+will slide a window across those percentiles and allow the user to compute the cumulative percentile.
+
+This is conceptually very similar to the <<search-aggregations-pipeline-movfn-aggregation, Moving Function>> pipeline aggregation,
+except it works on the percentiles sketches instead of the actual buckets values.
+
+==== Syntax
+
+A `moving_percentiles` aggregation looks like this in isolation:
+
+[source,js]
+--------------------------------------------------
+{
+    "moving_percentiles": {
+        "buckets_path": "the_percentile",
+        "window": 10
+    }
+}
+--------------------------------------------------
+// NOTCONSOLE
+
+[[moving-percentiles-params]]
+.`moving_percentiles` Parameters
+[options="header"]
+|===
+|Parameter Name |Description |Required |Default Value
+|`buckets_path` |Path to the percentile of interest (see <<buckets-path-syntax, `buckets_path` Syntax>> for more details |Required |
+|`window` |The size of window to "slide" across the histogram. |Required |
+|`shift` |<<shift-parameter, Shift>> of window position. |Optional | 0
+|===
+
+`moving_percentiles` aggregations must be embedded inside of a `histogram` or `date_histogram` aggregation.  They can be
+embedded like any other metric aggregation:
+
+[source,console]
+--------------------------------------------------
+POST /_search
+{
+    "size": 0,
+    "aggs": {
+        "my_date_histo":{                <1>
+            "date_histogram":{
+                "field":"date",
+                "calendar_interval":"1M"
+            },
+            "aggs":{
+                "the_percentile":{        <2>
+                    "percentiles":{
+                      "field": "price",
+                      "percents": [ 1.0, 99.0 ]
+                     }
+                },
+                "the_movperc": {
+                    "moving_percentiles": {
+                        "buckets_path": "the_percentile", <3>
+                        "window": 10
+                    }
+                }
+            }
+        }
+    }
+}
+--------------------------------------------------
+// TEST[setup:sales]
+
+<1> A `date_histogram` named "my_date_histo" is constructed on the "timestamp" field, with one-day intervals
+<2> A `percentile` metric is used to calculate the percentiles of a field.
+<3> Finally, we specify a `moving_percentiles` aggregation which uses "the_percentile" sketch as its input.
+
+Moving percentiles are built by first specifying a `histogram` or `date_histogram` over a field.  You  then add
+a percentile metric inside of that histogram.  Finally, the `moving_percentiles` is embedded inside the histogram.
+The `buckets_path` parameter is then used to "point" at the percentiles aggregation inside of the histogram (see
+<<buckets-path-syntax>> for a description of the syntax for `buckets_path`).
+
+And the following may be the response:
+
+[source,console-result]
+--------------------------------------------------
+{
+   "took": 11,
+   "timed_out": false,
+   "_shards": ...,
+   "hits": ...,
+   "aggregations": {
+      "my_date_histo": {
+         "buckets": [
+             {
+                 "key_as_string": "2015/01/01 00:00:00",
+                 "key": 1420070400000,
+                 "doc_count": 3,
+                 "the_percentile": {
+                     "values": {
+                       "1.0": 150.0,
+                       "99.0": 200.0
+                     }
+                 }
+             },
+             {
+                 "key_as_string": "2015/02/01 00:00:00",
+                 "key": 1422748800000,
+                 "doc_count": 2,
+                 "the_percentile": {
+                     "values": {
+                       "1.0": 10.0,
+                       "99.0": 50.0
+                     }
+                 },
+                 "the_movperc": {
+                   "values": {
+                     "1.0": 150.0,
+                     "99.0": 200.0
+                   }
+                 }
+             },
+             {
+                 "key_as_string": "2015/03/01 00:00:00",
+                 "key": 1425168000000,
+                 "doc_count": 2,
+                 "the_percentile": {
+                    "values": {
+                      "1.0": 175.0,
+                      "99.0": 200.0
+                    }
+                 },
+                 "the_movperc": {
+                    "values": {
+                      "1.0": 10.0,
+                      "99.0": 200.0
+                    }
+                 }
+             }
+         ]
+      }
+   }
+}
+--------------------------------------------------
+// TESTRESPONSE[s/"took": 11/"took": $body.took/]
+// TESTRESPONSE[s/"_shards": \.\.\./"_shards": $body._shards/]
+// TESTRESPONSE[s/"hits": \.\.\./"hits": $body.hits/]
+
+The output format of the `moving_percentiles` aggregation is inherited from the format of the referenced
+<<search-aggregations-metrics-percentile-aggregation,`percentiles`>> aggregation.
+
+Moving percentiles pipeline aggregations always run with `skip` gap policy.
+
+
+[[moving-percentiles-shift-parameter]]
+==== shift parameter
+
+By default (with `shift = 0`), the window that is offered for calculation is the last `n` values excluding the current bucket.
+Increasing `shift` by 1 moves starting window position by `1` to the right.
+
+- To include current bucket to the window, use `shift = 1`.
+- For center alignment (`n / 2` values before and after the current bucket), use `shift = window / 2`.
+- For right alignment (`n` values after the current bucket), use `shift = window`.
+
+If either of window edges moves outside the borders of data series, the window shrinks to include available values only.
diff --git a/docs/reference/rest-api/usage.asciidoc b/docs/reference/rest-api/usage.asciidoc
@@ -269,7 +269,8 @@ GET /_xpack/usage
       "top_metrics_usage" : 0,
       "cumulative_cardinality_usage" : 0,
       "t_test_usage" : 0,
-      "string_stats_usage" : 0
+      "string_stats_usage" : 0,
+      "moving_percentiles_usage" : 0
     }
   }
 }

diff --git a/...in/java/org/elasticsearch/search/aggregations/metrics/AbstractInternalHDRPercentiles.java b/...in/java/org/elasticsearch/search/aggregations/metrics/AbstractInternalHDRPercentiles.java
@@ -86,7 +86,7 @@ public double value(String name) {
         return value(Double.parseDouble(name));
     }
 
-    DocValueFormat formatter() {
+    public DocValueFormat formatter() {
         return format;
     }
 
@@ -96,10 +96,27 @@ public long getEstimatedMemoryFootprint() {
         return state.getEstimatedFootprintInBytes();
     }
 
-    DoubleHistogram getState() {
+    /**
+     * Return the internal {@link DoubleHistogram} sketch for this metric.
+     */
+    public DoubleHistogram getState() {
         return state;
     }
 
+    /**
+     * Return the keys (percentiles) requested.
+     */
+    public double[] getKeys() {
+        return keys;
+    }
+
+    /**
+     * Should the output be keyed.
+     */
+    public boolean keyed() {
+        return keyed;
+    }
+
     @Override
     public AbstractInternalHDRPercentiles reduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
         DoubleHistogram merged = null;

diff --git a/...ava/org/elasticsearch/search/aggregations/metrics/AbstractInternalTDigestPercentiles.java b/...ava/org/elasticsearch/search/aggregations/metrics/AbstractInternalTDigestPercentiles.java
@@ -72,18 +72,35 @@ public double value(String name) {
 
     public abstract double value(double key);
 
-    DocValueFormat formatter() {
+    public DocValueFormat formatter() {
         return format;
     }
 
     public long getEstimatedMemoryFootprint() {
         return state.byteSize();
     }
 
-    TDigestState getState() {
+    /**
+     * Return the internal {@link TDigestState} sketch for this metric.
+     */
+    public TDigestState getState() {
         return state;
     }
 
+    /**
+     * Return the keys (percentiles) requested.
+     */
+    public double[] getKeys() {
+        return keys;
+    }
+
+    /**
+     * Should the output be keyed.
+     */
+    public boolean keyed() {
+        return keyed;
+    }
+
     @Override
     public AbstractInternalTDigestPercentiles reduce(List<InternalAggregation> aggregations, ReduceContext reduceContext) {
         TDigestState merged = null;

diff --git a/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/AnalyticsPlugin.java b/x-pack/plugin/analytics/src/main/java/org/elasticsearch/xpack/analytics/AnalyticsPlugin.java
@@ -36,6 +36,7 @@
 import org.elasticsearch.xpack.analytics.boxplot.InternalBoxplot;
 import org.elasticsearch.xpack.analytics.cumulativecardinality.CumulativeCardinalityPipelineAggregationBuilder;
 import org.elasticsearch.xpack.analytics.mapper.HistogramFieldMapper;
+import org.elasticsearch.xpack.analytics.movingPercentiles.MovingPercentilesPipelineAggregationBuilder;
 import org.elasticsearch.xpack.analytics.stringstats.InternalStringStats;
 import org.elasticsearch.xpack.analytics.stringstats.StringStatsAggregationBuilder;
 import org.elasticsearch.xpack.analytics.topmetrics.InternalTopMetrics;
@@ -52,6 +53,7 @@
 import org.elasticsearch.xpack.core.action.XPackUsageFeatureAction;
 import org.elasticsearch.xpack.core.analytics.action.AnalyticsStatsAction;
 
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
 import java.util.Collections;
@@ -71,13 +73,18 @@ public AnalyticsPlugin() { }
 
     @Override
     public List<PipelineAggregationSpec> getPipelineAggregations() {
-        return singletonList(
-            new PipelineAggregationSpec(
-                CumulativeCardinalityPipelineAggregationBuilder.NAME,
-                CumulativeCardinalityPipelineAggregationBuilder::new,
-                usage.track(AnalyticsStatsAction.Item.CUMULATIVE_CARDINALITY,
-                        checkLicense(CumulativeCardinalityPipelineAggregationBuilder.PARSER)))
-        );
+        List<PipelineAggregationSpec> pipelineAggs = new ArrayList<>();
+        pipelineAggs.add(new PipelineAggregationSpec(
+            CumulativeCardinalityPipelineAggregationBuilder.NAME,
+            CumulativeCardinalityPipelineAggregationBuilder::new,
+            usage.track(AnalyticsStatsAction.Item.CUMULATIVE_CARDINALITY,
+                checkLicense(CumulativeCardinalityPipelineAggregationBuilder.PARSER))));
+        pipelineAggs.add(new PipelineAggregationSpec(
+            MovingPercentilesPipelineAggregationBuilder.NAME,
+            MovingPercentilesPipelineAggregationBuilder::new,
+            usage.track(AnalyticsStatsAction.Item.MOVING_PERCENTILES,
+                checkLicense(MovingPercentilesPipelineAggregationBuilder.PARSER))));
+        return pipelineAggs;
     }
 
     @Override