From 42e2308fce52912fe5334ee177e7deb48817d27d Mon Sep 17 00:00:00 2001 From: andrewjohnson2 Date: Wed, 10 Jun 2020 15:00:50 -0400 Subject: [PATCH] Added standard deviation / variance sampling to extended stats (#49782) Per 49554 I added standard deviation sampling and variance sampling to the extended stats interface. Closes #49554 Co-authored-by: Igor Motov --- .../extendedstats-aggregation.asciidoc | 24 ++- ...extended-stats-bucket-aggregation.asciidoc | 10 +- .../aggregations/metrics/ExtendedStatsIT.java | 166 ++++++++++++++- .../aggregations/metrics/ExtendedStats.java | 51 ++++- .../metrics/ExtendedStatsAggregator.java | 19 ++ .../metrics/InternalExtendedStats.java | 140 ++++++++++++- .../metrics/ParsedExtendedStats.java | 190 ++++++++++++++++-- .../metrics/ExtendedStatsAggregatorTests.java | 70 ++++++- .../metrics/InternalExtendedStatsTests.java | 26 ++- 9 files changed, 637 insertions(+), 59 deletions(-) diff --git a/docs/reference/aggregations/metrics/extendedstats-aggregation.asciidoc b/docs/reference/aggregations/metrics/extendedstats-aggregation.asciidoc index b420d27014e54..aa48ba08efbf9 100644 --- a/docs/reference/aggregations/metrics/extendedstats-aggregation.asciidoc +++ b/docs/reference/aggregations/metrics/extendedstats-aggregation.asciidoc @@ -21,6 +21,7 @@ GET /exams/_search The above aggregation computes the grades statistics over all documents. The aggregation type is `extended_stats` and the `field` setting defines the numeric field of the documents the stats will be computed on. The above will return the following: +The `std_deviation` and `variance` are calculated as population metrics so they are always the same as `std_deviation_population` and `variance_population` respectively. [source,console-result] -------------------------------------------------- @@ -36,10 +37,18 @@ The above aggregation computes the grades statistics over all documents. The agg "sum": 150.0, "sum_of_squares": 12500.0, "variance": 625.0, + "variance_population": 625.0, + "variance_sampling": 1250.0, "std_deviation": 25.0, + "std_deviation_population": 25.0, + "std_deviation_sampling": 35.35533905932738, "std_deviation_bounds": { "upper": 125.0, - "lower": 25.0 + "lower": 25.0, + "upper_population" : 125.0, + "lower_population" : 25.0, + "upper_sampling" : 145.71067811865476, + "lower_sampling" : 4.289321881345245 } } } @@ -75,6 +84,9 @@ GET /exams/_search `sigma` can be any non-negative double, meaning you can request non-integer values such as `1.5`. A value of `0` is valid, but will simply return the average for both `upper` and `lower` bounds. +The `upper` and `lower` bounds are calculated as population metrics so they are always the same as `upper_population` and +`lower_population` respectively. + .Standard Deviation and Bounds require normality [NOTE] ===== @@ -93,9 +105,9 @@ GET /exams/_search { "size": 0, "aggs" : { - "grades_stats" : { - "extended_stats" : { - "script" : { + "grades_stats" : { + "extended_stats" : { + "script" : { "source" : "doc['grade'].value", "lang" : "painless" } @@ -114,8 +126,8 @@ GET /exams/_search { "size": 0, "aggs" : { - "grades_stats" : { - "extended_stats" : { + "grades_stats" : { + "extended_stats" : { "script" : { "id": "my_script", "params": { diff --git a/docs/reference/aggregations/pipeline/extended-stats-bucket-aggregation.asciidoc b/docs/reference/aggregations/pipeline/extended-stats-bucket-aggregation.asciidoc index 0ec03c99e4815..d9c9acf176103 100644 --- a/docs/reference/aggregations/pipeline/extended-stats-bucket-aggregation.asciidoc +++ b/docs/reference/aggregations/pipeline/extended-stats-bucket-aggregation.asciidoc @@ -113,10 +113,18 @@ And the following may be the response: "sum": 985.0, "sum_of_squares": 446725.0, "variance": 41105.55555555556, + "variance_population": 41105.55555555556, + "variance_sampling": 61658.33333333334, "std_deviation": 202.74505063146563, + "std_deviation_population": 202.74505063146563, + "std_deviation_sampling": 248.3109609609156, "std_deviation_bounds": { "upper": 733.8234345962646, - "lower": -77.15676792959795 + "lower": -77.15676792959795, + "upper_population" : 733.8234345962646, + "lower_population" : -77.15676792959795, + "upper_sampling" : 824.9552552551645, + "lower_sampling" : -168.28858858849787 } } } diff --git a/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/metrics/ExtendedStatsIT.java b/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/metrics/ExtendedStatsIT.java index ecbd52aaf4868..6164a318fd6ff 100644 --- a/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/metrics/ExtendedStatsIT.java +++ b/server/src/internalClusterTest/java/org/elasticsearch/search/aggregations/metrics/ExtendedStatsIT.java @@ -7,7 +7,7 @@ * not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an @@ -66,7 +66,19 @@ private static double stdDev(int... vals) { return Math.sqrt(variance(vals)); } + private static double stdDevPopulation(int... vals) { + return Math.sqrt(variancePopulation(vals)); + } + + private static double stdDevSampling(int... vals) { + return Math.sqrt(varianceSampling(vals)); + } + private static double variance(int... vals) { + return variancePopulation(vals); + } + + private static double variancePopulation(int... vals) { double sum = 0; double sumOfSqrs = 0; for (int val : vals) { @@ -77,6 +89,17 @@ private static double variance(int... vals) { return variance < 0 ? 0 : variance; } + private static double varianceSampling(int... vals) { + double sum = 0; + double sumOfSqrs = 0; + for (int val : vals) { + sum += val; + sumOfSqrs += val * val; + } + double variance = (sumOfSqrs - ((sum * sum) / vals.length)) / (vals.length - 1); + return variance < 0 ? 0 : variance; + } + @Override public void testEmptyAggregation() throws Exception { SearchResponse searchResponse = client().prepareSearch("empty_bucket_idx") @@ -100,10 +123,15 @@ public void testEmptyAggregation() throws Exception { assertThat(stats.getMin(), equalTo(Double.POSITIVE_INFINITY)); assertThat(stats.getMax(), equalTo(Double.NEGATIVE_INFINITY)); assertThat(Double.isNaN(stats.getStdDeviation()), is(true)); + assertThat(Double.isNaN(stats.getStdDeviationPopulation()), is(true)); + assertThat(Double.isNaN(stats.getStdDeviationSampling()), is(true)); assertThat(Double.isNaN(stats.getAvg()), is(true)); - assertThat(Double.isNaN(stats.getStdDeviationBound(ExtendedStats.Bounds.UPPER)), is(true)); - assertThat(Double.isNaN(stats.getStdDeviationBound(ExtendedStats.Bounds.LOWER)), is(true)); - } + assertThat(Double.isNaN(stats.getStdDeviationBound(Bounds.UPPER)), is(true)); + assertThat(Double.isNaN(stats.getStdDeviationBound(Bounds.LOWER)), is(true)); + assertThat(Double.isNaN(stats.getStdDeviationBound(Bounds.UPPER_POPULATION)), is(true)); + assertThat(Double.isNaN(stats.getStdDeviationBound(Bounds.LOWER_POPULATION)), is(true)); + assertThat(Double.isNaN(stats.getStdDeviationBound(Bounds.UPPER_SAMPLING)), is(true)); + assertThat(Double.isNaN(stats.getStdDeviationBound(Bounds.LOWER_SAMPLING)), is(true));} @Override public void testUnmapped() throws Exception { @@ -124,9 +152,17 @@ public void testUnmapped() throws Exception { assertThat(stats.getCount(), equalTo(0L)); assertThat(stats.getSumOfSquares(), equalTo(0.0)); assertThat(stats.getVariance(), equalTo(Double.NaN)); + assertThat(stats.getVariancePopulation(), equalTo(Double.NaN)); + assertThat(stats.getVarianceSampling(), equalTo(Double.NaN)); assertThat(stats.getStdDeviation(), equalTo(Double.NaN)); - assertThat(Double.isNaN(stats.getStdDeviationBound(ExtendedStats.Bounds.UPPER)), is(true)); - assertThat(Double.isNaN(stats.getStdDeviationBound(ExtendedStats.Bounds.LOWER)), is(true)); + assertThat(stats.getStdDeviationPopulation(), equalTo(Double.NaN)); + assertThat(stats.getStdDeviationSampling(), equalTo(Double.NaN)); + assertThat(Double.isNaN(stats.getStdDeviationBound(Bounds.UPPER)), is(true)); + assertThat(Double.isNaN(stats.getStdDeviationBound(Bounds.LOWER)), is(true)); + assertThat(Double.isNaN(stats.getStdDeviationBound(Bounds.UPPER_POPULATION)), is(true)); + assertThat(Double.isNaN(stats.getStdDeviationBound(Bounds.LOWER_POPULATION)), is(true)); + assertThat(Double.isNaN(stats.getStdDeviationBound(Bounds.UPPER_SAMPLING)), is(true)); + assertThat(Double.isNaN(stats.getStdDeviationBound(Bounds.LOWER_SAMPLING)), is(true)); } public void testPartiallyUnmapped() { @@ -142,10 +178,15 @@ public void testPartiallyUnmapped() { assertEquals(s1.getMin(), s2.getMin(), 0d); assertEquals(s1.getMax(), s2.getMax(), 0d); assertEquals(s1.getStdDeviation(), s2.getStdDeviation(), 1e-10); + assertEquals(s1.getStdDeviationPopulation(), s2.getStdDeviationPopulation(), 1e-10); + assertEquals(s1.getStdDeviationSampling(), s2.getStdDeviationSampling(), 1e-10); assertEquals(s1.getSumOfSquares(), s2.getSumOfSquares(), 1e-10); assertEquals(s1.getStdDeviationBound(Bounds.LOWER), s2.getStdDeviationBound(Bounds.LOWER), 1e-10); assertEquals(s1.getStdDeviationBound(Bounds.UPPER), s2.getStdDeviationBound(Bounds.UPPER), 1e-10); - } + assertEquals(s1.getStdDeviationBound(Bounds.LOWER_POPULATION), s2.getStdDeviationBound(Bounds.LOWER_POPULATION), 1e-10); + assertEquals(s1.getStdDeviationBound(Bounds.UPPER_POPULATION), s2.getStdDeviationBound(Bounds.UPPER_POPULATION), 1e-10); + assertEquals(s1.getStdDeviationBound(Bounds.LOWER_SAMPLING), s2.getStdDeviationBound(Bounds.LOWER_SAMPLING), 1e-10); + assertEquals(s1.getStdDeviationBound(Bounds.UPPER_SAMPLING), s2.getStdDeviationBound(Bounds.UPPER_SAMPLING), 1e-10);} @Override public void testSingleValuedField() throws Exception { @@ -167,7 +208,11 @@ public void testSingleValuedField() throws Exception { assertThat(stats.getCount(), equalTo(10L)); assertThat(stats.getSumOfSquares(), equalTo((double) 1+4+9+16+25+36+49+64+81+100)); assertThat(stats.getVariance(), equalTo(variance(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); + assertThat(stats.getVariancePopulation(), equalTo(variancePopulation(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); + assertThat(stats.getVarianceSampling(), equalTo(varianceSampling(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); assertThat(stats.getStdDeviation(), equalTo(stdDev(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); + assertThat(stats.getStdDeviationPopulation(), equalTo(stdDevPopulation(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); + assertThat(stats.getStdDeviationSampling(), equalTo(stdDevSampling(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); checkUpperLowerBounds(stats, sigma); } @@ -191,7 +236,11 @@ public void testSingleValuedFieldDefaultSigma() throws Exception { assertThat(stats.getCount(), equalTo(10L)); assertThat(stats.getSumOfSquares(), equalTo((double) 1+4+9+16+25+36+49+64+81+100)); assertThat(stats.getVariance(), equalTo(variance(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); + assertThat(stats.getVariancePopulation(), equalTo(variancePopulation(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); + assertThat(stats.getVarianceSampling(), equalTo(varianceSampling(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); assertThat(stats.getStdDeviation(), equalTo(stdDev(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); + assertThat(stats.getStdDeviationPopulation(), equalTo(stdDevPopulation(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); + assertThat(stats.getStdDeviationSampling(), equalTo(stdDevSampling(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); checkUpperLowerBounds(stats, 2); } @@ -217,9 +266,17 @@ public void testSingleValuedField_WithFormatter() throws Exception { assertThat(stats.getSumOfSquares(), equalTo((double) 1 + 4 + 9 + 16 + 25 + 36 + 49 + 64 + 81 + 100)); assertThat(stats.getSumOfSquaresAsString(), equalTo("0385.0")); assertThat(stats.getVariance(), equalTo(variance(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); + assertThat(stats.getVariancePopulation(), equalTo(variancePopulation(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); + assertThat(stats.getVarianceSampling(), equalTo(varianceSampling(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); assertThat(stats.getVarianceAsString(), equalTo("0008.2")); + assertThat(stats.getVariancePopulationAsString(), equalTo("0008.2")); + assertThat(stats.getVarianceSamplingAsString(), equalTo("0009.2")); assertThat(stats.getStdDeviation(), equalTo(stdDev(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); + assertThat(stats.getStdDeviationPopulation(), equalTo(stdDevPopulation(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); + assertThat(stats.getStdDeviationSampling(), equalTo(stdDevSampling(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); assertThat(stats.getStdDeviationAsString(), equalTo("0002.9")); + assertThat(stats.getStdDeviationPopulationAsString(), equalTo("0002.9")); + assertThat(stats.getStdDeviationSamplingAsString(), equalTo("0003.0")); checkUpperLowerBounds(stats, sigma); } @@ -264,9 +321,24 @@ public void testSingleValuedFieldGetProperty() throws Exception { double expectedVarianceValue = variance(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); assertThat(stats.getVariance(), equalTo(expectedVarianceValue)); assertThat((double) ((InternalAggregation)global).getProperty("stats.variance"), equalTo(expectedVarianceValue)); + double expectedVariancePopulationValue = variancePopulation(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + assertThat(stats.getVariancePopulation(), equalTo(expectedVariancePopulationValue)); + assertThat((double) ((InternalAggregation)global).getProperty("stats.variance_population"), + equalTo(expectedVariancePopulationValue)); + double expectedVarianceSamplingValue = varianceSampling(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + assertThat(stats.getVarianceSampling(), equalTo(expectedVarianceSamplingValue)); + assertThat((double) ((InternalAggregation)global).getProperty("stats.variance_sampling"), equalTo(expectedVarianceSamplingValue)); double expectedStdDevValue = stdDev(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); assertThat(stats.getStdDeviation(), equalTo(expectedStdDevValue)); assertThat((double) ((InternalAggregation)global).getProperty("stats.std_deviation"), equalTo(expectedStdDevValue)); + double expectedStdDevPopulationValue = stdDevPopulation(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + assertThat(stats.getStdDeviationPopulation(), equalTo(expectedStdDevValue)); + assertThat((double) ((InternalAggregation)global).getProperty("stats.std_deviation_population"), + equalTo(expectedStdDevPopulationValue)); + double expectedStdDevSamplingValue = stdDevSampling(1, 2, 3, 4, 5, 6, 7, 8, 9, 10); + assertThat(stats.getStdDeviationSampling(), equalTo(expectedStdDevSamplingValue)); + assertThat((double) ((InternalAggregation)global).getProperty("stats.std_deviation_sampling"), + equalTo(expectedStdDevSamplingValue)); } @Override @@ -289,7 +361,11 @@ public void testSingleValuedFieldPartiallyUnmapped() throws Exception { assertThat(stats.getCount(), equalTo(10L)); assertThat(stats.getSumOfSquares(), equalTo((double) 1+4+9+16+25+36+49+64+81+100)); assertThat(stats.getVariance(), equalTo(variance(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); + assertThat(stats.getVariancePopulation(), equalTo(variancePopulation(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); + assertThat(stats.getVarianceSampling(), equalTo(varianceSampling(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); assertThat(stats.getStdDeviation(), equalTo(stdDev(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); + assertThat(stats.getStdDeviationPopulation(), equalTo(stdDevPopulation(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); + assertThat(stats.getStdDeviationSampling(), equalTo(stdDevSampling(1, 2, 3, 4, 5, 6, 7, 8, 9, 10))); checkUpperLowerBounds(stats, sigma); } @@ -318,7 +394,11 @@ public void testSingleValuedFieldWithValueScript() throws Exception { assertThat(stats.getCount(), equalTo(10L)); assertThat(stats.getSumOfSquares(), equalTo((double) 4+9+16+25+36+49+64+81+100+121)); assertThat(stats.getVariance(), equalTo(variance(2, 3, 4, 5, 6, 7, 8, 9, 10, 11))); + assertThat(stats.getVariancePopulation(), equalTo(variancePopulation(2, 3, 4, 5, 6, 7, 8, 9, 10, 11))); + assertThat(stats.getVarianceSampling(), equalTo(varianceSampling(2, 3, 4, 5, 6, 7, 8, 9, 10, 11))); assertThat(stats.getStdDeviation(), equalTo(stdDev(2, 3, 4, 5, 6, 7, 8, 9, 10, 11))); + assertThat(stats.getStdDeviationPopulation(), equalTo(stdDevPopulation(2, 3, 4, 5, 6, 7, 8, 9, 10, 11))); + assertThat(stats.getStdDeviationSampling(), equalTo(stdDevSampling(2, 3, 4, 5, 6, 7, 8, 9, 10, 11))); checkUpperLowerBounds(stats, sigma); } @@ -348,7 +428,11 @@ public void testSingleValuedFieldWithValueScriptWithParams() throws Exception { assertThat(stats.getCount(), equalTo(10L)); assertThat(stats.getSumOfSquares(), equalTo((double) 4+9+16+25+36+49+64+81+100+121)); assertThat(stats.getVariance(), equalTo(variance(2, 3, 4, 5, 6, 7, 8, 9, 10, 11))); + assertThat(stats.getVariancePopulation(), equalTo(variancePopulation(2, 3, 4, 5, 6, 7, 8, 9, 10, 11))); + assertThat(stats.getVarianceSampling(), equalTo(varianceSampling(2, 3, 4, 5, 6, 7, 8, 9, 10, 11))); assertThat(stats.getStdDeviation(), equalTo(stdDev(2, 3, 4, 5, 6, 7, 8, 9, 10, 11))); + assertThat(stats.getStdDeviationPopulation(), equalTo(stdDevPopulation(2, 3, 4, 5, 6, 7, 8, 9, 10, 11))); + assertThat(stats.getStdDeviationSampling(), equalTo(stdDevSampling(2, 3, 4, 5, 6, 7, 8, 9, 10, 11))); checkUpperLowerBounds(stats, sigma); } @@ -372,7 +456,15 @@ public void testMultiValuedField() throws Exception { assertThat(stats.getCount(), equalTo(20L)); assertThat(stats.getSumOfSquares(), equalTo((double) 4+9+16+25+36+49+64+81+100+121+9+16+25+36+49+64+81+100+121+144)); assertThat(stats.getVariance(), equalTo(variance(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12))); + assertThat(stats.getVariancePopulation(), + equalTo(variancePopulation(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12))); + assertThat(stats.getVarianceSampling(), + equalTo(varianceSampling(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12))); assertThat(stats.getStdDeviation(), equalTo(stdDev(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12))); + assertThat(stats.getStdDeviationPopulation(), + equalTo(stdDevPopulation(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12))); + assertThat(stats.getStdDeviationSampling(), + equalTo(stdDevSampling(2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12))); checkUpperLowerBounds(stats, sigma); } @@ -401,7 +493,14 @@ public void testMultiValuedFieldWithValueScript() throws Exception { assertThat(stats.getCount(), equalTo(20L)); assertThat(stats.getSumOfSquares(), equalTo((double) 1+4+9+16+25+36+49+64+81+100+4+9+16+25+36+49+64+81+100+121)); assertThat(stats.getVariance(), equalTo(variance(1, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 11))); + assertThat(stats.getVariancePopulation(), + equalTo(variancePopulation(1, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 11))); + assertThat(stats.getVarianceSampling(), equalTo(varianceSampling(1, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 11))); assertThat(stats.getStdDeviation(), equalTo(stdDev(1, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 11))); + assertThat(stats.getStdDeviationPopulation(), + equalTo(stdDevPopulation(1, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 11))); + assertThat(stats.getStdDeviationSampling(), equalTo(stdDevSampling(1, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 11))); + checkUpperLowerBounds(stats, sigma); } @@ -431,7 +530,13 @@ public void testMultiValuedFieldWithValueScriptWithParams() throws Exception { assertThat(stats.getCount(), equalTo(20L)); assertThat(stats.getSumOfSquares(), equalTo((double) 1+4+9+16+25+36+49+64+81+100+4+9+16+25+36+49+64+81+100+121)); assertThat(stats.getVariance(), equalTo(variance(1, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 11))); + assertThat(stats.getVariancePopulation(), + equalTo(variancePopulation(1, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 11))); + assertThat(stats.getVarianceSampling(), equalTo(varianceSampling(1, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 11))); assertThat(stats.getStdDeviation(), equalTo(stdDev(1, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 11))); + assertThat(stats.getStdDeviationPopulation(), + equalTo(stdDevPopulation(1, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 11))); + assertThat(stats.getStdDeviationSampling(), equalTo(stdDevSampling(1, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 11))); checkUpperLowerBounds(stats, sigma); } @@ -459,7 +564,11 @@ public void testScriptSingleValued() throws Exception { assertThat(stats.getCount(), equalTo(10L)); assertThat(stats.getSumOfSquares(), equalTo((double) 1+4+9+16+25+36+49+64+81+100)); assertThat(stats.getVariance(), equalTo(variance(1, 2, 3, 4, 5, 6, 7, 8 ,9, 10))); + assertThat(stats.getVariancePopulation(), equalTo(variancePopulation(1, 2, 3, 4, 5, 6, 7, 8 ,9, 10))); + assertThat(stats.getVarianceSampling(), equalTo(varianceSampling(1, 2, 3, 4, 5, 6, 7, 8 ,9, 10))); assertThat(stats.getStdDeviation(), equalTo(stdDev(1, 2, 3, 4, 5, 6, 7, 8 ,9, 10))); + assertThat(stats.getStdDeviationPopulation(), equalTo(stdDevPopulation(1, 2, 3, 4, 5, 6, 7, 8 ,9, 10))); + assertThat(stats.getStdDeviationSampling(), equalTo(stdDevSampling(1, 2, 3, 4, 5, 6, 7, 8 ,9, 10))); checkUpperLowerBounds(stats, sigma); } @@ -491,7 +600,11 @@ public void testScriptSingleValuedWithParams() throws Exception { assertThat(stats.getCount(), equalTo(10L)); assertThat(stats.getSumOfSquares(), equalTo((double) 4+9+16+25+36+49+64+81+100+121)); assertThat(stats.getVariance(), equalTo(variance(2, 3, 4, 5, 6, 7, 8 ,9, 10, 11))); + assertThat(stats.getVariancePopulation(), equalTo(variancePopulation(2, 3, 4, 5, 6, 7, 8 ,9, 10, 11))); + assertThat(stats.getVarianceSampling(), equalTo(varianceSampling(2, 3, 4, 5, 6, 7, 8 ,9, 10, 11))); assertThat(stats.getStdDeviation(), equalTo(stdDev(2, 3, 4, 5, 6, 7, 8 ,9, 10, 11))); + assertThat(stats.getStdDeviationPopulation(), equalTo(stdDevPopulation(2, 3, 4, 5, 6, 7, 8 ,9, 10, 11))); + assertThat(stats.getStdDeviationSampling(), equalTo(stdDevSampling(2, 3, 4, 5, 6, 7, 8 ,9, 10, 11))); checkUpperLowerBounds(stats, sigma); } @@ -519,7 +632,15 @@ public void testScriptMultiValued() throws Exception { assertThat(stats.getCount(), equalTo(20L)); assertThat(stats.getSumOfSquares(), equalTo((double) 4+9+16+25+36+49+64+81+100+121+9+16+25+36+49+64+81+100+121+144)); assertThat(stats.getVariance(), equalTo(variance(2, 3, 4, 5, 6, 7, 8 ,9, 10, 11, 3, 4, 5, 6, 7, 8 ,9, 10, 11, 12))); + assertThat(stats.getVariancePopulation(), + equalTo(variancePopulation(2, 3, 4, 5, 6, 7, 8 ,9, 10, 11, 3, 4, 5, 6, 7, 8 ,9, 10, 11, 12))); + assertThat(stats.getVarianceSampling(), + equalTo(varianceSampling(2, 3, 4, 5, 6, 7, 8 ,9, 10, 11, 3, 4, 5, 6, 7, 8 ,9, 10, 11, 12))); assertThat(stats.getStdDeviation(), equalTo(stdDev(2, 3, 4, 5, 6, 7, 8 ,9, 10, 11, 3, 4, 5, 6, 7, 8 ,9, 10, 11, 12))); + assertThat(stats.getStdDeviationPopulation(), + equalTo(stdDevPopulation(2, 3, 4, 5, 6, 7, 8 ,9, 10, 11, 3, 4, 5, 6, 7, 8 ,9, 10, 11, 12))); + assertThat(stats.getStdDeviationSampling(), + equalTo(stdDevSampling(2, 3, 4, 5, 6, 7, 8 ,9, 10, 11, 3, 4, 5, 6, 7, 8 ,9, 10, 11, 12))); checkUpperLowerBounds(stats, sigma); } @@ -552,7 +673,13 @@ public void testScriptMultiValuedWithParams() throws Exception { assertThat(stats.getCount(), equalTo(20L)); assertThat(stats.getSumOfSquares(), equalTo((double) 1+4+9+16+25+36+49+64+81+100+0+1+4+9+16+25+36+49+64+81)); assertThat(stats.getVariance(), equalTo(variance(1, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8 ,9))); + assertThat(stats.getVariancePopulation(), + equalTo(variancePopulation(1, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8 ,9))); + assertThat(stats.getVarianceSampling(), equalTo(varianceSampling(1, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8 ,9))); assertThat(stats.getStdDeviation(), equalTo(stdDev(1, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8 ,9))); + assertThat(stats.getStdDeviationPopulation(), + equalTo(stdDevPopulation(1, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8 ,9))); + assertThat(stats.getStdDeviationSampling(), equalTo(stdDevSampling(1, 2, 3, 4, 5, 6, 7, 8 ,9, 10, 0, 1, 2, 3, 4, 5, 6, 7, 8 ,9))); checkUpperLowerBounds(stats, sigma); } @@ -586,9 +713,15 @@ public void testEmptySubAggregation() { assertThat(stats.getMin(), equalTo(Double.POSITIVE_INFINITY)); assertThat(stats.getMax(), equalTo(Double.NEGATIVE_INFINITY)); assertThat(Double.isNaN(stats.getStdDeviation()), is(true)); + assertThat(Double.isNaN(stats.getStdDeviationPopulation()), is(true)); + assertThat(Double.isNaN(stats.getStdDeviationSampling()), is(true)); assertThat(Double.isNaN(stats.getAvg()), is(true)); - assertThat(Double.isNaN(stats.getStdDeviationBound(ExtendedStats.Bounds.UPPER)), is(true)); - assertThat(Double.isNaN(stats.getStdDeviationBound(ExtendedStats.Bounds.LOWER)), is(true)); + assertThat(Double.isNaN(stats.getStdDeviationBound(Bounds.UPPER)), is(true)); + assertThat(Double.isNaN(stats.getStdDeviationBound(Bounds.LOWER)), is(true)); + assertThat(Double.isNaN(stats.getStdDeviationBound(Bounds.UPPER_POPULATION)), is(true)); + assertThat(Double.isNaN(stats.getStdDeviationBound(Bounds.LOWER_POPULATION)), is(true)); + assertThat(Double.isNaN(stats.getStdDeviationBound(Bounds.UPPER_SAMPLING)), is(true)); + assertThat(Double.isNaN(stats.getStdDeviationBound(Bounds.LOWER_SAMPLING)), is(true)); } } @@ -625,17 +758,30 @@ public void testOrderByEmptyAggregation() throws Exception { assertThat(extendedStats.getSum(), equalTo(0.0)); assertThat(extendedStats.getCount(), equalTo(0L)); assertThat(extendedStats.getStdDeviation(), equalTo(Double.NaN)); + assertThat(extendedStats.getStdDeviationPopulation(), equalTo(Double.NaN)); + assertThat(extendedStats.getStdDeviationSampling(), equalTo(Double.NaN)); assertThat(extendedStats.getSumOfSquares(), equalTo(0.0)); assertThat(extendedStats.getVariance(), equalTo(Double.NaN)); + assertThat(extendedStats.getVariancePopulation(), equalTo(Double.NaN)); + assertThat(extendedStats.getVarianceSampling(), equalTo(Double.NaN)); assertThat(extendedStats.getStdDeviationBound(Bounds.LOWER), equalTo(Double.NaN)); assertThat(extendedStats.getStdDeviationBound(Bounds.UPPER), equalTo(Double.NaN)); - + assertThat(extendedStats.getStdDeviationBound(Bounds.LOWER_POPULATION), equalTo(Double.NaN)); + assertThat(extendedStats.getStdDeviationBound(Bounds.UPPER_POPULATION), equalTo(Double.NaN)); + assertThat(extendedStats.getStdDeviationBound(Bounds.LOWER_SAMPLING), equalTo(Double.NaN)); + assertThat(extendedStats.getStdDeviationBound(Bounds.UPPER_SAMPLING), equalTo(Double.NaN)); } } private void checkUpperLowerBounds(ExtendedStats stats, double sigma) { assertThat(stats.getStdDeviationBound(ExtendedStats.Bounds.UPPER), equalTo(stats.getAvg() + (stats.getStdDeviation() * sigma))); assertThat(stats.getStdDeviationBound(ExtendedStats.Bounds.LOWER), equalTo(stats.getAvg() - (stats.getStdDeviation() * sigma))); + assertThat(stats.getStdDeviationBound(Bounds.UPPER_POPULATION), equalTo(stats.getAvg() + + (stats.getStdDeviationPopulation() * sigma))); + assertThat(stats.getStdDeviationBound(Bounds.LOWER_POPULATION), equalTo(stats.getAvg() - + (stats.getStdDeviationPopulation() * sigma))); + assertThat(stats.getStdDeviationBound(Bounds.UPPER_SAMPLING), equalTo(stats.getAvg() + (stats.getStdDeviationSampling() * sigma))); + assertThat(stats.getStdDeviationBound(Bounds.LOWER_SAMPLING), equalTo(stats.getAvg() - (stats.getStdDeviationSampling() * sigma))); } /** diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/ExtendedStats.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/ExtendedStats.java index 68dac3e373d1c..39950ca03a441 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/ExtendedStats.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/ExtendedStats.java @@ -7,7 +7,7 @@ * not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an @@ -29,25 +29,55 @@ public interface ExtendedStats extends Stats { double getSumOfSquares(); /** - * The variance of the collected values. + * The population variance of the collected values. */ double getVariance(); /** - * The standard deviation of the collected values. + * The population variance of the collected values. + */ + double getVariancePopulation(); + + /** + * The sampling variance of the collected values. + */ + double getVarianceSampling(); + + /** + * The population standard deviation of the collected values. */ double getStdDeviation(); + /** + * The population standard deviation of the collected values. + */ + double getStdDeviationPopulation(); + + /** + * The sampling standard deviation of the collected values. + */ + double getStdDeviationSampling(); + /** * The upper or lower bounds of the stdDeviation */ double getStdDeviationBound(Bounds bound); /** - * The standard deviation of the collected values as a String. + * The population standard deviation of the collected values as a String. */ String getStdDeviationAsString(); + /** + * The population standard deviation of the collected values as a String. + */ + String getStdDeviationPopulationAsString(); + + /** + * The sampling standard deviation of the collected values as a String. + */ + String getStdDeviationSamplingAsString(); + /** * The upper or lower bounds of stdDev of the collected values as a String. */ @@ -60,13 +90,22 @@ public interface ExtendedStats extends Stats { String getSumOfSquaresAsString(); /** - * The variance of the collected values as a String. + * The population variance of the collected values as a String. */ String getVarianceAsString(); + /** + * The population variance of the collected values as a String. + */ + String getVariancePopulationAsString(); + + /** + * The sampling variance of the collected values as a String. + */ + String getVarianceSamplingAsString(); enum Bounds { - UPPER, LOWER + UPPER, LOWER, UPPER_POPULATION, LOWER_POPULATION, UPPER_SAMPLING, LOWER_SAMPLING } } diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/ExtendedStatsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/ExtendedStatsAggregator.java index c5b8cdaae9edc..7e8258986a335 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/ExtendedStatsAggregator.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/ExtendedStatsAggregator.java @@ -162,7 +162,11 @@ public double metric(String name, long owningBucketOrd) { case avg: return Double.NaN; case sum_of_squares: return 0; case variance: return Double.NaN; + case variance_population: return Double.NaN; + case variance_sampling: return Double.NaN; case std_deviation: return Double.NaN; + case std_deviation_population: return Double.NaN; + case std_deviation_sampling: return Double.NaN; case std_upper: return Double.NaN; case std_lower: return Double.NaN; default: @@ -177,7 +181,11 @@ public double metric(String name, long owningBucketOrd) { case avg: return sums.get(owningBucketOrd) / counts.get(owningBucketOrd); case sum_of_squares: return sumOfSqrs.get(owningBucketOrd); case variance: return variance(owningBucketOrd); + case variance_population: return variancePopulation(owningBucketOrd); + case variance_sampling: return varianceSampling(owningBucketOrd); case std_deviation: return Math.sqrt(variance(owningBucketOrd)); + case std_deviation_population: return Math.sqrt(variance(owningBucketOrd)); + case std_deviation_sampling: return Math.sqrt(varianceSampling(owningBucketOrd)); case std_upper: return (sums.get(owningBucketOrd) / counts.get(owningBucketOrd)) + (Math.sqrt(variance(owningBucketOrd)) * this.sigma); case std_lower: @@ -188,12 +196,23 @@ public double metric(String name, long owningBucketOrd) { } private double variance(long owningBucketOrd) { + return variancePopulation(owningBucketOrd); + } + + private double variancePopulation(long owningBucketOrd) { double sum = sums.get(owningBucketOrd); long count = counts.get(owningBucketOrd); double variance = (sumOfSqrs.get(owningBucketOrd) - ((sum * sum) / count)) / count; return variance < 0 ? 0 : variance; } + private double varianceSampling(long owningBucketOrd) { + double sum = sums.get(owningBucketOrd); + long count = counts.get(owningBucketOrd); + double variance = (sumOfSqrs.get(owningBucketOrd) - ((sum * sum) / count)) / (count - 1); + return variance < 0 ? 0 : variance; + } + @Override public InternalAggregation buildAggregation(long bucket) { if (valuesSource == null || bucket >= counts.size()) { diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/InternalExtendedStats.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/InternalExtendedStats.java index 5e09a1c37660d..a438b9193122d 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/InternalExtendedStats.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/InternalExtendedStats.java @@ -7,7 +7,7 @@ * not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an @@ -32,7 +32,9 @@ public class InternalExtendedStats extends InternalStats implements ExtendedStats { enum Metrics { - count, sum, min, max, avg, sum_of_squares, variance, std_deviation, std_upper, std_lower; + count, sum, min, max, avg, sum_of_squares, variance, variance_population, variance_sampling, + std_deviation, std_deviation_population, std_deviation_sampling, std_upper, std_lower, std_upper_population, std_lower_population, + std_upper_sampling, std_lower_sampling; public static Metrics resolve(String name) { return Metrics.valueOf(name); @@ -77,15 +79,39 @@ public double value(String name) { if ("variance".equals(name)) { return getVariance(); } + if ("variance_population".equals(name)) { + return getVariancePopulation(); + } + if ("variance_sampling".equals(name)) { + return getVarianceSampling(); + } if ("std_deviation".equals(name)) { return getStdDeviation(); } + if ("std_deviation_population".equals(name)) { + return getStdDeviationPopulation(); + } + if ("std_deviation_sampling".equals(name)) { + return getStdDeviationSampling(); + } if ("std_upper".equals(name)) { return getStdDeviationBound(Bounds.UPPER); } if ("std_lower".equals(name)) { return getStdDeviationBound(Bounds.LOWER); } + if ("std_upper_population".equals(name)) { + return getStdDeviationBound(Bounds.UPPER_POPULATION); + } + if ("std_lower_population".equals(name)) { + return getStdDeviationBound(Bounds.LOWER_POPULATION); + } + if ("std_upper_sampling".equals(name)) { + return getStdDeviationBound(Bounds.UPPER_SAMPLING); + } + if ("std_lower_sampling".equals(name)) { + return getStdDeviationBound(Bounds.LOWER_SAMPLING); + } return super.value(name); } @@ -100,21 +126,51 @@ public double getSumOfSquares() { @Override public double getVariance() { + return getVariancePopulation(); + } + + @Override + public double getVariancePopulation() { double variance = (sumOfSqrs - ((sum * sum) / count)) / count; return variance < 0 ? 0 : variance; } + @Override + public double getVarianceSampling() { + double variance = (sumOfSqrs - ((sum * sum) / count)) / (count - 1); + return variance < 0 ? 0 : variance; + } + @Override public double getStdDeviation() { - return Math.sqrt(getVariance()); + return getStdDeviationPopulation(); + } + + @Override + public double getStdDeviationPopulation() { + return Math.sqrt(getVariancePopulation()); + } + + @Override + public double getStdDeviationSampling() { + return Math.sqrt(getVarianceSampling()); } @Override public double getStdDeviationBound(Bounds bound) { - if (bound.equals(Bounds.UPPER)) { - return getAvg() + (getStdDeviation() * sigma); - } else { - return getAvg() - (getStdDeviation() * sigma); + switch (bound) { + case UPPER: + case UPPER_POPULATION: + return getAvg() + (getStdDeviationPopulation() * sigma); + case UPPER_SAMPLING: + return getAvg() + (getStdDeviationSampling() * sigma); + case LOWER: + case LOWER_POPULATION: + return getAvg() - (getStdDeviationPopulation() * sigma); + case LOWER_SAMPLING: + return getAvg() - (getStdDeviationSampling() * sigma); + default: + throw new IllegalArgumentException("Unknown bounds type " + bound); } } @@ -128,14 +184,49 @@ public String getVarianceAsString() { return valueAsString(Metrics.variance.name()); } + @Override + public String getVariancePopulationAsString() { + return valueAsString(Metrics.variance_population.name()); + } + + @Override + public String getVarianceSamplingAsString() { + return valueAsString(Metrics.variance_sampling.name()); + } + @Override public String getStdDeviationAsString() { return valueAsString(Metrics.std_deviation.name()); } + @Override + public String getStdDeviationPopulationAsString() { + return valueAsString(Metrics.std_deviation_population.name()); + } + + @Override + public String getStdDeviationSamplingAsString() { + return valueAsString(Metrics.std_deviation_sampling.name()); + } + @Override public String getStdDeviationBoundAsString(Bounds bound) { - return bound == Bounds.UPPER ? valueAsString(Metrics.std_upper.name()) : valueAsString(Metrics.std_lower.name()); + switch (bound) { + case UPPER: + return valueAsString(Metrics.std_upper.name()); + case LOWER: + return valueAsString(Metrics.std_lower.name()); + case UPPER_POPULATION: + return valueAsString(Metrics.std_upper_population.name()); + case LOWER_POPULATION: + return valueAsString(Metrics.std_lower_population.name()); + case UPPER_SAMPLING: + return valueAsString(Metrics.std_upper_sampling.name()); + case LOWER_SAMPLING: + return valueAsString(Metrics.std_lower_sampling.name()); + default: + throw new IllegalArgumentException("Unknown bounds type " + bound); + } } @Override @@ -167,13 +258,24 @@ static class Fields { public static final String SUM_OF_SQRS_AS_STRING = "sum_of_squares_as_string"; public static final String VARIANCE = "variance"; public static final String VARIANCE_AS_STRING = "variance_as_string"; + public static final String VARIANCE_POPULATION = "variance_population"; + public static final String VARIANCE_POPULATION_AS_STRING = "variance_population_as_string"; + public static final String VARIANCE_SAMPLING = "variance_sampling"; + public static final String VARIANCE_SAMPLING_AS_STRING = "variance_sampling_as_string"; public static final String STD_DEVIATION = "std_deviation"; public static final String STD_DEVIATION_AS_STRING = "std_deviation_as_string"; + public static final String STD_DEVIATION_POPULATION = "std_deviation_population"; + public static final String STD_DEVIATION_POPULATION_AS_STRING = "std_deviation_population_as_string"; + public static final String STD_DEVIATION_SAMPLING = "std_deviation_sampling"; + public static final String STD_DEVIATION_SAMPLING_AS_STRING = "std_deviation_sampling_as_string"; public static final String STD_DEVIATION_BOUNDS = "std_deviation_bounds"; public static final String STD_DEVIATION_BOUNDS_AS_STRING = "std_deviation_bounds_as_string"; public static final String UPPER = "upper"; public static final String LOWER = "lower"; - + public static final String UPPER_POPULATION = "upper_population"; + public static final String LOWER_POPULATION = "lower_population"; + public static final String UPPER_SAMPLING = "upper_sampling"; + public static final String LOWER_SAMPLING = "lower_sampling"; } @Override @@ -181,28 +283,48 @@ protected XContentBuilder otherStatsToXContent(XContentBuilder builder, Params p if (count != 0) { builder.field(Fields.SUM_OF_SQRS, sumOfSqrs); builder.field(Fields.VARIANCE, getVariance()); + builder.field(Fields.VARIANCE_POPULATION, getVariancePopulation()); + builder.field(Fields.VARIANCE_SAMPLING, getVarianceSampling()); builder.field(Fields.STD_DEVIATION, getStdDeviation()); + builder.field(Fields.STD_DEVIATION_POPULATION, getStdDeviationPopulation()); + builder.field(Fields.STD_DEVIATION_SAMPLING, getStdDeviationSampling()); builder.startObject(Fields.STD_DEVIATION_BOUNDS); { builder.field(Fields.UPPER, getStdDeviationBound(Bounds.UPPER)); builder.field(Fields.LOWER, getStdDeviationBound(Bounds.LOWER)); + builder.field(Fields.UPPER_POPULATION, getStdDeviationBound(Bounds.UPPER_POPULATION)); + builder.field(Fields.LOWER_POPULATION, getStdDeviationBound(Bounds.LOWER_POPULATION)); + builder.field(Fields.UPPER_SAMPLING, getStdDeviationBound(Bounds.UPPER_SAMPLING)); + builder.field(Fields.LOWER_SAMPLING, getStdDeviationBound(Bounds.LOWER_SAMPLING)); } builder.endObject(); if (format != DocValueFormat.RAW) { builder.field(Fields.SUM_OF_SQRS_AS_STRING, format.format(sumOfSqrs)); builder.field(Fields.VARIANCE_AS_STRING, format.format(getVariance())); + builder.field(Fields.VARIANCE_POPULATION_AS_STRING, format.format(getVariancePopulation())); + builder.field(Fields.VARIANCE_SAMPLING_AS_STRING, format.format(getVarianceSampling())); builder.field(Fields.STD_DEVIATION_AS_STRING, getStdDeviationAsString()); + builder.field(Fields.STD_DEVIATION_POPULATION_AS_STRING, getStdDeviationPopulationAsString()); + builder.field(Fields.STD_DEVIATION_SAMPLING_AS_STRING, getStdDeviationSamplingAsString()); builder.startObject(Fields.STD_DEVIATION_BOUNDS_AS_STRING); { builder.field(Fields.UPPER, getStdDeviationBoundAsString(Bounds.UPPER)); builder.field(Fields.LOWER, getStdDeviationBoundAsString(Bounds.LOWER)); + builder.field(Fields.UPPER_POPULATION, getStdDeviationBoundAsString(Bounds.UPPER_POPULATION)); + builder.field(Fields.LOWER_POPULATION, getStdDeviationBoundAsString(Bounds.LOWER_POPULATION)); + builder.field(Fields.UPPER_SAMPLING, getStdDeviationBoundAsString(Bounds.UPPER_SAMPLING)); + builder.field(Fields.LOWER_SAMPLING, getStdDeviationBoundAsString(Bounds.LOWER_SAMPLING)); } builder.endObject(); } } else { builder.nullField(Fields.SUM_OF_SQRS); builder.nullField(Fields.VARIANCE); + builder.nullField(Fields.VARIANCE_POPULATION); + builder.nullField(Fields.VARIANCE_SAMPLING); builder.nullField(Fields.STD_DEVIATION); + builder.nullField(Fields.STD_DEVIATION_POPULATION); + builder.nullField(Fields.STD_DEVIATION_SAMPLING); builder.startObject(Fields.STD_DEVIATION_BOUNDS); { builder.nullField(Fields.UPPER); diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/ParsedExtendedStats.java b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/ParsedExtendedStats.java index cee96c07a2471..7e3cb00f5ff92 100644 --- a/server/src/main/java/org/elasticsearch/search/aggregations/metrics/ParsedExtendedStats.java +++ b/server/src/main/java/org/elasticsearch/search/aggregations/metrics/ParsedExtendedStats.java @@ -7,7 +7,7 @@ * not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an @@ -20,7 +20,6 @@ package org.elasticsearch.search.aggregations.metrics; import org.elasticsearch.common.ParseField; -import org.elasticsearch.common.collect.Tuple; import org.elasticsearch.common.xcontent.ConstructingObjectParser; import org.elasticsearch.common.xcontent.ObjectParser; import org.elasticsearch.common.xcontent.ObjectParser.ValueType; @@ -29,16 +28,29 @@ import org.elasticsearch.search.aggregations.metrics.InternalExtendedStats.Fields; import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import java.util.stream.Collectors; import static org.elasticsearch.common.xcontent.ConstructingObjectParser.constructorArg; +import static org.elasticsearch.common.xcontent.ConstructingObjectParser.optionalConstructorArg; public class ParsedExtendedStats extends ParsedStats implements ExtendedStats { protected double sumOfSquares; protected double variance; + protected double variancePopulation; + protected double varianceSampling; protected double stdDeviation; + protected double stdDeviationPopulation; + protected double stdDeviationSampling; protected double stdDeviationBoundUpper; protected double stdDeviationBoundLower; + protected double stdDeviationBoundUpperPopulation; + protected double stdDeviationBoundLowerPopulation; + protected double stdDeviationBoundUpperSampling; + protected double stdDeviationBoundLowerSampling; + protected double sum; protected double avg; @@ -57,19 +69,70 @@ public double getVariance() { return variance; } + @Override + public double getVariancePopulation() { + return variancePopulation; + } + + @Override + public double getVarianceSampling() { + return varianceSampling; + } + @Override public double getStdDeviation() { return stdDeviation; } - private void setStdDeviationBounds(Tuple bounds) { - this.stdDeviationBoundLower = bounds.v1(); - this.stdDeviationBoundUpper = bounds.v2(); + @Override + public double getStdDeviationPopulation() { + return stdDeviationPopulation; + } + + @Override + public double getStdDeviationSampling() { + return stdDeviationSampling; + } + + private void setStdDeviationBounds(List bounds) { + int i = 0; + this.stdDeviationBoundUpper = bounds.get(i++); + this.stdDeviationBoundLower = bounds.get(i++); + this.stdDeviationBoundUpperPopulation = bounds.get(i++); + this.stdDeviationBoundLowerPopulation = bounds.get(i++); + this.stdDeviationBoundUpperSampling = bounds.get(i++); + this.stdDeviationBoundLowerSampling = bounds.get(i); + } @Override public double getStdDeviationBound(Bounds bound) { - return (bound.equals(Bounds.LOWER)) ? stdDeviationBoundLower : stdDeviationBoundUpper; + switch (bound) { + case UPPER: + return stdDeviationBoundUpper; + case UPPER_POPULATION: + return stdDeviationBoundUpperPopulation; + case UPPER_SAMPLING: + return stdDeviationBoundUpperSampling; + case LOWER: + return stdDeviationBoundLower; + case LOWER_POPULATION: + return stdDeviationBoundLowerPopulation; + case LOWER_SAMPLING: + return stdDeviationBoundLowerSampling; + default: + throw new IllegalArgumentException("Unknown bounds type " + bound); + } + } + + private void setStdDeviationBoundsAsString(List boundsAsString) { + int i = 0; + this.valueAsString.put(Fields.STD_DEVIATION_BOUNDS_AS_STRING + "_upper", boundsAsString.get(i++)); + this.valueAsString.put(Fields.STD_DEVIATION_BOUNDS_AS_STRING + "_lower", boundsAsString.get(i++)); + this.valueAsString.put(Fields.STD_DEVIATION_BOUNDS_AS_STRING + "_upper_population", boundsAsString.get(i++)); + this.valueAsString.put(Fields.STD_DEVIATION_BOUNDS_AS_STRING + "_lower_population", boundsAsString.get(i++)); + this.valueAsString.put(Fields.STD_DEVIATION_BOUNDS_AS_STRING + "_upper_sampling", boundsAsString.get(i++)); + this.valueAsString.put(Fields.STD_DEVIATION_BOUNDS_AS_STRING + "_lower_sampling", boundsAsString.get(i)); } @Override @@ -77,17 +140,39 @@ public String getStdDeviationAsString() { return valueAsString.getOrDefault(Fields.STD_DEVIATION_AS_STRING, Double.toString(stdDeviation)); } - private void setStdDeviationBoundsAsString(Tuple boundsAsString) { - this.valueAsString.put(Fields.STD_DEVIATION_BOUNDS_AS_STRING + "_lower", boundsAsString.v1()); - this.valueAsString.put(Fields.STD_DEVIATION_BOUNDS_AS_STRING + "_upper", boundsAsString.v2()); + @Override + public String getStdDeviationPopulationAsString() { + return valueAsString.getOrDefault(Fields.STD_DEVIATION_POPULATION_AS_STRING, Double.toString(stdDeviationPopulation)); + } + + @Override + public String getStdDeviationSamplingAsString() { + return valueAsString.getOrDefault(Fields.STD_DEVIATION_SAMPLING_AS_STRING, Double.toString(stdDeviationSampling)); } @Override public String getStdDeviationBoundAsString(Bounds bound) { - if (bound.equals(Bounds.LOWER)) { - return valueAsString.getOrDefault(Fields.STD_DEVIATION_BOUNDS_AS_STRING + "_lower", Double.toString(stdDeviationBoundLower)); - } else { - return valueAsString.getOrDefault(Fields.STD_DEVIATION_BOUNDS_AS_STRING + "_upper", Double.toString(stdDeviationBoundUpper)); + switch (bound) { + case UPPER: + return valueAsString.getOrDefault(Fields.STD_DEVIATION_BOUNDS_AS_STRING + "_upper", + Double.toString(stdDeviationBoundUpper)); + case UPPER_POPULATION: + return valueAsString.getOrDefault(Fields.STD_DEVIATION_BOUNDS_AS_STRING + "_upper_population", + Double.toString(stdDeviationBoundUpperPopulation)); + case UPPER_SAMPLING: + return valueAsString.getOrDefault(Fields.STD_DEVIATION_BOUNDS_AS_STRING + "_upper_sampling", + Double.toString(stdDeviationBoundUpperSampling)); + case LOWER: + return valueAsString.getOrDefault(Fields.STD_DEVIATION_BOUNDS_AS_STRING + "_lower", + Double.toString(stdDeviationBoundLower)); + case LOWER_POPULATION: + return valueAsString.getOrDefault(Fields.STD_DEVIATION_BOUNDS_AS_STRING + "_lower_population", + Double.toString(stdDeviationBoundLowerPopulation)); + case LOWER_SAMPLING: + return valueAsString.getOrDefault(Fields.STD_DEVIATION_BOUNDS_AS_STRING + "_lower_sampling", + Double.toString(stdDeviationBoundLowerSampling)); + default: + throw new IllegalArgumentException("Unknown bounds type " + bound); } } @@ -101,37 +186,71 @@ public String getVarianceAsString() { return valueAsString.getOrDefault(Fields.VARIANCE_AS_STRING, Double.toString(variance)); } + @Override + public String getVariancePopulationAsString() { + return valueAsString.getOrDefault(Fields.VARIANCE_POPULATION_AS_STRING, Double.toString(variancePopulation)); + } + + @Override + public String getVarianceSamplingAsString() { + return valueAsString.getOrDefault(Fields.VARIANCE_SAMPLING_AS_STRING, Double.toString(varianceSampling)); + } + @Override protected XContentBuilder otherStatsToXContent(XContentBuilder builder, Params params) throws IOException { if (count != 0) { builder.field(Fields.SUM_OF_SQRS, sumOfSquares); builder.field(Fields.VARIANCE, getVariance()); + builder.field(Fields.VARIANCE_POPULATION, getVariancePopulation()); + builder.field(Fields.VARIANCE_SAMPLING, getVarianceSampling()); builder.field(Fields.STD_DEVIATION, getStdDeviation()); + builder.field(Fields.STD_DEVIATION_POPULATION, getStdDeviationPopulation()); + builder.field(Fields.STD_DEVIATION_SAMPLING, getStdDeviationSampling()); builder.startObject(Fields.STD_DEVIATION_BOUNDS); { builder.field(Fields.UPPER, getStdDeviationBound(Bounds.UPPER)); builder.field(Fields.LOWER, getStdDeviationBound(Bounds.LOWER)); + builder.field(Fields.UPPER_POPULATION, getStdDeviationBound(Bounds.UPPER_POPULATION)); + builder.field(Fields.LOWER_POPULATION, getStdDeviationBound(Bounds.LOWER_POPULATION)); + builder.field(Fields.UPPER_SAMPLING, getStdDeviationBound(Bounds.UPPER_SAMPLING)); + builder.field(Fields.LOWER_SAMPLING, getStdDeviationBound(Bounds.LOWER_SAMPLING)); } builder.endObject(); if (valueAsString.containsKey(Fields.SUM_OF_SQRS_AS_STRING)) { builder.field(Fields.SUM_OF_SQRS_AS_STRING, getSumOfSquaresAsString()); builder.field(Fields.VARIANCE_AS_STRING, getVarianceAsString()); + builder.field(Fields.VARIANCE_POPULATION_AS_STRING, getVariancePopulationAsString()); + builder.field(Fields.VARIANCE_SAMPLING_AS_STRING, getVarianceSamplingAsString()); builder.field(Fields.STD_DEVIATION_AS_STRING, getStdDeviationAsString()); + builder.field(Fields.STD_DEVIATION_POPULATION_AS_STRING, getStdDeviationPopulationAsString()); + builder.field(Fields.STD_DEVIATION_SAMPLING_AS_STRING, getStdDeviationSamplingAsString()); builder.startObject(Fields.STD_DEVIATION_BOUNDS_AS_STRING); { builder.field(Fields.UPPER, getStdDeviationBoundAsString(Bounds.UPPER)); builder.field(Fields.LOWER, getStdDeviationBoundAsString(Bounds.LOWER)); + builder.field(Fields.UPPER_POPULATION, getStdDeviationBoundAsString(Bounds.UPPER_POPULATION)); + builder.field(Fields.LOWER_POPULATION, getStdDeviationBoundAsString(Bounds.LOWER_POPULATION)); + builder.field(Fields.UPPER_SAMPLING, getStdDeviationBoundAsString(Bounds.UPPER_SAMPLING)); + builder.field(Fields.LOWER_SAMPLING, getStdDeviationBoundAsString(Bounds.LOWER_SAMPLING)); } builder.endObject(); } } else { builder.nullField(Fields.SUM_OF_SQRS); builder.nullField(Fields.VARIANCE); + builder.nullField(Fields.VARIANCE_POPULATION); + builder.nullField(Fields.VARIANCE_SAMPLING); builder.nullField(Fields.STD_DEVIATION); + builder.nullField(Fields.STD_DEVIATION_POPULATION); + builder.nullField(Fields.STD_DEVIATION_SAMPLING); builder.startObject(Fields.STD_DEVIATION_BOUNDS); { builder.nullField(Fields.UPPER); builder.nullField(Fields.LOWER); + builder.nullField(Fields.UPPER_POPULATION); + builder.nullField(Fields.LOWER_POPULATION); + builder.nullField(Fields.UPPER_SAMPLING); + builder.nullField(Fields.LOWER_SAMPLING); } builder.endObject(); } @@ -141,20 +260,33 @@ protected XContentBuilder otherStatsToXContent(XContentBuilder builder, Params p private static final ObjectParser PARSER = new ObjectParser<>(ParsedExtendedStats.class.getSimpleName(), true, ParsedExtendedStats::new); - private static final ConstructingObjectParser, Void> STD_BOUNDS_PARSER = new ConstructingObjectParser<>( - ParsedExtendedStats.class.getSimpleName() + "_STD_BOUNDS", true, args -> new Tuple<>((Double) args[0], (Double) args[1])); + private static final ConstructingObjectParser, Void> STD_BOUNDS_PARSER = new ConstructingObjectParser<>( + ParsedExtendedStats.class.getSimpleName() + "_STD_BOUNDS", true, args -> Arrays.stream(args).map(d -> (Double) d).collect( + Collectors.toList())); - private static final ConstructingObjectParser, Void> STD_BOUNDS_AS_STRING_PARSER = new ConstructingObjectParser<>( - ParsedExtendedStats.class.getSimpleName() + "_STD_BOUNDS_AS_STRING", true, - args -> new Tuple<>((String) args[0], (String) args[1])); + private static final ConstructingObjectParser, Void> STD_BOUNDS_AS_STRING_PARSER = new ConstructingObjectParser<>( + ParsedExtendedStats.class.getSimpleName() + "_STD_BOUNDS_AS_STRING", true, args -> Arrays.stream(args).map(d -> (String) d).collect( + Collectors.toList())); static { STD_BOUNDS_PARSER.declareField(constructorArg(), (parser, context) -> parseDouble(parser, 0), - new ParseField(Fields.LOWER), ValueType.DOUBLE_OR_NULL); + new ParseField(Fields.UPPER), ValueType.DOUBLE_OR_NULL); STD_BOUNDS_PARSER.declareField(constructorArg(), (parser, context) -> parseDouble(parser, 0), - new ParseField(Fields.UPPER), ValueType.DOUBLE_OR_NULL); - STD_BOUNDS_AS_STRING_PARSER.declareString(constructorArg(), new ParseField(Fields.LOWER)); + new ParseField(Fields.LOWER), ValueType.DOUBLE_OR_NULL); + STD_BOUNDS_PARSER.declareField(optionalConstructorArg(), (parser, context) -> parseDouble(parser, 0), + new ParseField(Fields.UPPER_POPULATION), ValueType.DOUBLE_OR_NULL); + STD_BOUNDS_PARSER.declareField(optionalConstructorArg(), (parser, context) -> parseDouble(parser, 0), + new ParseField(Fields.LOWER_POPULATION), ValueType.DOUBLE_OR_NULL); + STD_BOUNDS_PARSER.declareField(optionalConstructorArg(), (parser, context) -> parseDouble(parser, 0), + new ParseField(Fields.UPPER_SAMPLING), ValueType.DOUBLE_OR_NULL); + STD_BOUNDS_PARSER.declareField(optionalConstructorArg(), (parser, context) -> parseDouble(parser, 0), + new ParseField(Fields.LOWER_SAMPLING), ValueType.DOUBLE_OR_NULL); STD_BOUNDS_AS_STRING_PARSER.declareString(constructorArg(), new ParseField(Fields.UPPER)); + STD_BOUNDS_AS_STRING_PARSER.declareString(constructorArg(), new ParseField(Fields.LOWER)); + STD_BOUNDS_AS_STRING_PARSER.declareString(optionalConstructorArg(), new ParseField(Fields.UPPER_POPULATION)); + STD_BOUNDS_AS_STRING_PARSER.declareString(optionalConstructorArg(), new ParseField(Fields.LOWER_POPULATION)); + STD_BOUNDS_AS_STRING_PARSER.declareString(optionalConstructorArg(), new ParseField(Fields.UPPER_SAMPLING)); + STD_BOUNDS_AS_STRING_PARSER.declareString(optionalConstructorArg(), new ParseField(Fields.LOWER_SAMPLING)); declareExtendedStatsFields(PARSER); } @@ -165,16 +297,32 @@ protected static void declareExtendedStatsFields(ObjectParser agg.variance = value, (parser, context) -> parseDouble(parser, 0), new ParseField(Fields.VARIANCE), ValueType.DOUBLE_OR_NULL); + objectParser.declareField((agg, value) -> agg.variancePopulation = value, (parser, context) -> parseDouble(parser, 0), + new ParseField(Fields.VARIANCE_POPULATION), ValueType.DOUBLE_OR_NULL); + objectParser.declareField((agg, value) -> agg.varianceSampling = value, (parser, context) -> parseDouble(parser, 0), + new ParseField(Fields.VARIANCE_SAMPLING), ValueType.DOUBLE_OR_NULL); objectParser.declareField((agg, value) -> agg.stdDeviation = value, (parser, context) -> parseDouble(parser, 0), new ParseField(Fields.STD_DEVIATION), ValueType.DOUBLE_OR_NULL); + objectParser.declareField((agg, value) -> agg.stdDeviationPopulation = value, (parser, context) -> parseDouble(parser, 0), + new ParseField(Fields.STD_DEVIATION_POPULATION), ValueType.DOUBLE_OR_NULL); + objectParser.declareField((agg, value) -> agg.stdDeviationSampling = value, (parser, context) -> parseDouble(parser, 0), + new ParseField(Fields.STD_DEVIATION_SAMPLING), ValueType.DOUBLE_OR_NULL); objectParser.declareObject(ParsedExtendedStats::setStdDeviationBounds, STD_BOUNDS_PARSER, new ParseField(Fields.STD_DEVIATION_BOUNDS)); objectParser.declareString((agg, value) -> agg.valueAsString.put(Fields.SUM_OF_SQRS_AS_STRING, value), new ParseField(Fields.SUM_OF_SQRS_AS_STRING)); objectParser.declareString((agg, value) -> agg.valueAsString.put(Fields.VARIANCE_AS_STRING, value), new ParseField(Fields.VARIANCE_AS_STRING)); + objectParser.declareString((agg, value) -> agg.valueAsString.put(Fields.VARIANCE_POPULATION_AS_STRING, value), + new ParseField(Fields.VARIANCE_POPULATION_AS_STRING)); + objectParser.declareString((agg, value) -> agg.valueAsString.put(Fields.VARIANCE_SAMPLING_AS_STRING, value), + new ParseField(Fields.VARIANCE_SAMPLING_AS_STRING)); objectParser.declareString((agg, value) -> agg.valueAsString.put(Fields.STD_DEVIATION_AS_STRING, value), new ParseField(Fields.STD_DEVIATION_AS_STRING)); + objectParser.declareString((agg, value) -> agg.valueAsString.put(Fields.STD_DEVIATION_POPULATION_AS_STRING, value), + new ParseField(Fields.STD_DEVIATION_POPULATION_AS_STRING)); + objectParser.declareString((agg, value) -> agg.valueAsString.put(Fields.STD_DEVIATION_SAMPLING_AS_STRING, value), + new ParseField(Fields.STD_DEVIATION_SAMPLING_AS_STRING)); objectParser.declareObject(ParsedExtendedStats::setStdDeviationBoundsAsString, STD_BOUNDS_AS_STRING_PARSER, new ParseField(Fields.STD_DEVIATION_BOUNDS_AS_STRING)); } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/metrics/ExtendedStatsAggregatorTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/metrics/ExtendedStatsAggregatorTests.java index 001f18e8eea47..9e781885e1eb4 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/metrics/ExtendedStatsAggregatorTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/metrics/ExtendedStatsAggregatorTests.java @@ -7,7 +7,7 @@ * not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an @@ -56,7 +56,11 @@ public void testEmpty() throws IOException { assertEquals(Double.POSITIVE_INFINITY, stats.getMin(), 0); assertEquals(Double.NEGATIVE_INFINITY, stats.getMax(), 0); assertEquals(Double.NaN, stats.getVariance(), 0); + assertEquals(Double.NaN, stats.getVariancePopulation(), 0); + assertEquals(Double.NaN, stats.getVarianceSampling(), 0); assertEquals(Double.NaN, stats.getStdDeviation(), 0); + assertEquals(Double.NaN, stats.getStdDeviationPopulation(), 0); + assertEquals(Double.NaN, stats.getStdDeviationSampling(), 0); assertEquals(0d, stats.getSumOfSquares(), 0); assertFalse(AggregationInspectionHelper.hasValue(stats)); } @@ -91,11 +95,23 @@ public void testRandomDoubles() throws IOException { assertEquals(expected.sum / expected.count, stats.getAvg(), TOLERANCE); assertEquals(expected.sumOfSqrs, stats.getSumOfSquares(), TOLERANCE); assertEquals(expected.stdDev(), stats.getStdDeviation(), TOLERANCE); + assertEquals(expected.stdDevPopulation(), stats.getStdDeviationPopulation(), TOLERANCE); + assertEquals(expected.stdDevSampling(), stats.getStdDeviationSampling(), TOLERANCE); assertEquals(expected.variance(), stats.getVariance(), TOLERANCE); + assertEquals(expected.variancePopulation(), stats.getVariancePopulation(), TOLERANCE); + assertEquals(expected.varianceSampling(), stats.getVarianceSampling(), TOLERANCE); assertEquals(expected.stdDevBound(ExtendedStats.Bounds.LOWER, stats.getSigma()), stats.getStdDeviationBound(ExtendedStats.Bounds.LOWER), TOLERANCE); assertEquals(expected.stdDevBound(ExtendedStats.Bounds.UPPER, stats.getSigma()), stats.getStdDeviationBound(ExtendedStats.Bounds.UPPER), TOLERANCE); + assertEquals(expected.stdDevBound(ExtendedStats.Bounds.LOWER_POPULATION, stats.getSigma()), + stats.getStdDeviationBound(ExtendedStats.Bounds.LOWER_POPULATION), TOLERANCE); + assertEquals(expected.stdDevBound(ExtendedStats.Bounds.UPPER_POPULATION, stats.getSigma()), + stats.getStdDeviationBound(ExtendedStats.Bounds.UPPER_POPULATION), TOLERANCE); + assertEquals(expected.stdDevBound(ExtendedStats.Bounds.LOWER_SAMPLING, stats.getSigma()), + stats.getStdDeviationBound(ExtendedStats.Bounds.LOWER_SAMPLING), TOLERANCE); + assertEquals(expected.stdDevBound(ExtendedStats.Bounds.UPPER_SAMPLING, stats.getSigma()), + stats.getStdDeviationBound(ExtendedStats.Bounds.UPPER_SAMPLING), TOLERANCE); assertTrue(AggregationInspectionHelper.hasValue(stats)); } ); @@ -124,7 +140,11 @@ public void testVarianceNonNegative() throws IOException { stats -> { //since the value(49.95) is a constant, variance should be 0 assertEquals(0.0d, stats.getVariance(), TOLERANCE); + assertEquals(0.0d, stats.getVariancePopulation(), TOLERANCE); + assertEquals(0.0d, stats.getVarianceSampling(), TOLERANCE); assertEquals(0.0d, stats.getStdDeviation(), TOLERANCE); + assertEquals(0.0d, stats.getStdDeviationPopulation(), TOLERANCE); + assertEquals(0.0d, stats.getStdDeviationSampling(), TOLERANCE); } ); } @@ -156,11 +176,23 @@ public void testRandomLongs() throws IOException { assertEquals(expected.sum / expected.count, stats.getAvg(), TOLERANCE); assertEquals(expected.sumOfSqrs, stats.getSumOfSquares(), TOLERANCE); assertEquals(expected.stdDev(), stats.getStdDeviation(), TOLERANCE); + assertEquals(expected.stdDevPopulation(), stats.getStdDeviationPopulation(), TOLERANCE); + assertEquals(expected.stdDevSampling(), stats.getStdDeviationSampling(), TOLERANCE); assertEquals(expected.variance(), stats.getVariance(), TOLERANCE); + assertEquals(expected.variancePopulation(), stats.getVariancePopulation(), TOLERANCE); + assertEquals(expected.varianceSampling(), stats.getVarianceSampling(), TOLERANCE); assertEquals(expected.stdDevBound(ExtendedStats.Bounds.LOWER, stats.getSigma()), stats.getStdDeviationBound(ExtendedStats.Bounds.LOWER), TOLERANCE); assertEquals(expected.stdDevBound(ExtendedStats.Bounds.UPPER, stats.getSigma()), stats.getStdDeviationBound(ExtendedStats.Bounds.UPPER), TOLERANCE); + assertEquals(expected.stdDevBound(ExtendedStats.Bounds.LOWER_POPULATION, stats.getSigma()), + stats.getStdDeviationBound(ExtendedStats.Bounds.LOWER_POPULATION), TOLERANCE); + assertEquals(expected.stdDevBound(ExtendedStats.Bounds.UPPER_POPULATION, stats.getSigma()), + stats.getStdDeviationBound(ExtendedStats.Bounds.UPPER_POPULATION), TOLERANCE); + assertEquals(expected.stdDevBound(ExtendedStats.Bounds.LOWER_SAMPLING, stats.getSigma()), + stats.getStdDeviationBound(ExtendedStats.Bounds.LOWER_SAMPLING), TOLERANCE); + assertEquals(expected.stdDevBound(ExtendedStats.Bounds.UPPER_SAMPLING, stats.getSigma()), + stats.getStdDeviationBound(ExtendedStats.Bounds.UPPER_SAMPLING), TOLERANCE); assertTrue(AggregationInspectionHelper.hasValue(stats)); } ); @@ -257,17 +289,45 @@ void add(double value) { return Math.sqrt(variance()); } + double stdDevPopulation() { + return Math.sqrt(variancePopulation()); + } + + double stdDevSampling() { + return Math.sqrt(varianceSampling()); + } + double stdDevBound(ExtendedStats.Bounds bounds, double sigma) { - if (bounds == ExtendedStats.Bounds.UPPER) { - return (sum / count) + (Math.sqrt(variance()) * sigma); - } else { - return (sum / count) - (Math.sqrt(variance()) * sigma); + switch (bounds) { + case UPPER: + return (sum / count) + (Math.sqrt(variance()) * sigma); + case UPPER_POPULATION: + return (sum / count) + (Math.sqrt(variancePopulation()) * sigma); + case UPPER_SAMPLING: + return (sum / count) + (Math.sqrt(varianceSampling()) * sigma); + case LOWER: + return (sum / count) - (Math.sqrt(variance()) * sigma); + case LOWER_POPULATION: + return (sum / count) - (Math.sqrt(variancePopulation()) * sigma); + case LOWER_SAMPLING: + return (sum / count) - (Math.sqrt(varianceSampling()) * sigma); + default: + throw new IllegalArgumentException("Unknown bound " + bounds); } } double variance() { + return variancePopulation(); + } + + double variancePopulation() { double variance = (sumOfSqrs - ((sum * sum) / count)) / count; return variance < 0 ? 0 : variance; } + + double varianceSampling() { + double variance = (sumOfSqrs - ((sum * sum) / count)) / (count - 1); + return variance < 0 ? 0 : variance; + } } } diff --git a/server/src/test/java/org/elasticsearch/search/aggregations/metrics/InternalExtendedStatsTests.java b/server/src/test/java/org/elasticsearch/search/aggregations/metrics/InternalExtendedStatsTests.java index 4966e97eba57f..927c7f37d50a1 100644 --- a/server/src/test/java/org/elasticsearch/search/aggregations/metrics/InternalExtendedStatsTests.java +++ b/server/src/test/java/org/elasticsearch/search/aggregations/metrics/InternalExtendedStatsTests.java @@ -7,7 +7,7 @@ * not use this file except in compliance with the License. * You may obtain a copy of the License at * - * http://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an @@ -96,16 +96,40 @@ protected void assertFromXContent(InternalExtendedStats aggregation, ParsedAggre // for count == 0, fields are rendered as `null`, so we test that we parse to default values used also in the reduce phase assertEquals(count > 0 ? aggregation.getSumOfSquares() : 0 , parsed.getSumOfSquares(), 0); assertEquals(count > 0 ? aggregation.getVariance() : 0 , parsed.getVariance(), 0); + assertEquals(count > 0 ? aggregation.getVariancePopulation() : 0 , parsed.getVariancePopulation(), 0); + assertEquals(count > 0 ? aggregation.getVarianceSampling() : 0 , parsed.getVarianceSampling(), 0); assertEquals(count > 0 ? aggregation.getStdDeviation() : 0 , parsed.getStdDeviation(), 0); + assertEquals(count > 0 ? aggregation.getStdDeviationPopulation() : 0 , parsed.getStdDeviationPopulation(), 0); + assertEquals(count > 0 ? aggregation.getStdDeviationSampling() : 0 , parsed.getStdDeviationSampling(), 0); assertEquals(count > 0 ? aggregation.getStdDeviationBound(Bounds.LOWER) : 0 , parsed.getStdDeviationBound(Bounds.LOWER), 0); assertEquals(count > 0 ? aggregation.getStdDeviationBound(Bounds.UPPER) : 0 , parsed.getStdDeviationBound(Bounds.UPPER), 0); + assertEquals(count > 0 ? aggregation.getStdDeviationBound(Bounds.LOWER_POPULATION) : 0 , + parsed.getStdDeviationBound(Bounds.LOWER_POPULATION), 0); + assertEquals(count > 0 ? aggregation.getStdDeviationBound(Bounds.UPPER_POPULATION) : 0 , + parsed.getStdDeviationBound(Bounds.UPPER_POPULATION), 0); + assertEquals(count > 0 ? aggregation.getStdDeviationBound(Bounds.LOWER_SAMPLING) : 0 , + parsed.getStdDeviationBound(Bounds.LOWER_SAMPLING), 0); + assertEquals(count > 0 ? aggregation.getStdDeviationBound(Bounds.UPPER_SAMPLING) : 0 , + parsed.getStdDeviationBound(Bounds.UPPER_SAMPLING), 0); // also as_string values are only rendered for count != 0 if (count > 0) { assertEquals(aggregation.getSumOfSquaresAsString(), parsed.getSumOfSquaresAsString()); assertEquals(aggregation.getVarianceAsString(), parsed.getVarianceAsString()); + assertEquals(aggregation.getVariancePopulationAsString(), parsed.getVariancePopulationAsString()); + assertEquals(aggregation.getVarianceSamplingAsString(), parsed.getVarianceSamplingAsString()); assertEquals(aggregation.getStdDeviationAsString(), parsed.getStdDeviationAsString()); + assertEquals(aggregation.getStdDeviationPopulationAsString(), parsed.getStdDeviationPopulationAsString()); + assertEquals(aggregation.getStdDeviationSamplingAsString(), parsed.getStdDeviationSamplingAsString()); assertEquals(aggregation.getStdDeviationBoundAsString(Bounds.LOWER), parsed.getStdDeviationBoundAsString(Bounds.LOWER)); assertEquals(aggregation.getStdDeviationBoundAsString(Bounds.UPPER), parsed.getStdDeviationBoundAsString(Bounds.UPPER)); + assertEquals(aggregation.getStdDeviationBoundAsString(Bounds.LOWER_POPULATION), + parsed.getStdDeviationBoundAsString(Bounds.LOWER_POPULATION)); + assertEquals(aggregation.getStdDeviationBoundAsString(Bounds.UPPER_POPULATION), + parsed.getStdDeviationBoundAsString(Bounds.UPPER_POPULATION)); + assertEquals(aggregation.getStdDeviationBoundAsString(Bounds.LOWER_SAMPLING), + parsed.getStdDeviationBoundAsString(Bounds.LOWER_SAMPLING)); + assertEquals(aggregation.getStdDeviationBoundAsString(Bounds.UPPER_SAMPLING), + parsed.getStdDeviationBoundAsString(Bounds.UPPER_SAMPLING)); } }