elastic · pcsanwald · Jul 30, 2018 · Jul 13, 2018 · Jul 13, 2018 · Jul 16, 2018
diff --git a/client/rest-high-level/src/main/java/org/elasticsearch/client/RestHighLevelClient.java b/client/rest-high-level/src/main/java/org/elasticsearch/client/RestHighLevelClient.java
@@ -85,8 +85,10 @@
 import org.elasticsearch.search.aggregations.bucket.geogrid.ParsedGeoHashGrid;
 import org.elasticsearch.search.aggregations.bucket.global.GlobalAggregationBuilder;
 import org.elasticsearch.search.aggregations.bucket.global.ParsedGlobal;
+import org.elasticsearch.search.aggregations.bucket.histogram.AutoDateHistogramAggregationBuilder;
 import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder;
 import org.elasticsearch.search.aggregations.bucket.histogram.HistogramAggregationBuilder;
+import org.elasticsearch.search.aggregations.bucket.histogram.ParsedAutoDateHistogram;
 import org.elasticsearch.search.aggregations.bucket.histogram.ParsedDateHistogram;
 import org.elasticsearch.search.aggregations.bucket.histogram.ParsedHistogram;
 import org.elasticsearch.search.aggregations.bucket.missing.MissingAggregationBuilder;
@@ -1347,6 +1349,7 @@ static List<NamedXContentRegistry.Entry> getDefaultNamedXContents() {
         map.put(GeoCentroidAggregationBuilder.NAME, (p, c) -> ParsedGeoCentroid.fromXContent(p, (String) c));
         map.put(HistogramAggregationBuilder.NAME, (p, c) -> ParsedHistogram.fromXContent(p, (String) c));
         map.put(DateHistogramAggregationBuilder.NAME, (p, c) -> ParsedDateHistogram.fromXContent(p, (String) c));
+        map.put(AutoDateHistogramAggregationBuilder.NAME, (p, c) -> ParsedAutoDateHistogram.fromXContent(p, (String) c));
         map.put(StringTerms.NAME, (p, c) -> ParsedStringTerms.fromXContent(p, (String) c));
         map.put(LongTerms.NAME, (p, c) -> ParsedLongTerms.fromXContent(p, (String) c));
         map.put(DoubleTerms.NAME, (p, c) -> ParsedDoubleTerms.fromXContent(p, (String) c));

diff --git a/docs/reference/aggregations/bucket.asciidoc b/docs/reference/aggregations/bucket.asciidoc
@@ -19,6 +19,8 @@ setting named `search.max_buckets`. It is disabled by default (-1) but requests
 
 include::bucket/adjacency-matrix-aggregation.asciidoc[]
 
+include::bucket/autodatehistogram-aggregation.asciidoc[]
+
 include::bucket/children-aggregation.asciidoc[]
 
 include::bucket/composite-aggregation.asciidoc[]

diff --git a/docs/reference/aggregations/bucket/autodatehistogram-aggregation.asciidoc b/docs/reference/aggregations/bucket/autodatehistogram-aggregation.asciidoc
@@ -0,0 +1,283 @@
+[[search-aggregations-bucket-autodatehistogram-aggregation]]
+=== Auto-interval Date Histogram Aggregation
+
+A multi-bucket aggregation similar to the <<search-aggregations-bucket-datehistogram-aggregation>> except 
+instead of providing an interval to use as the width of each bucket, a target number of buckets is provided
+indicating the number of buckets needed and the interval of the buckets is automatically chosen to best achieve
+that target. The number of buckets returned will always be less than or equal to this target number.
+
+The buckets field is optional, and will default to 10 buckets if not specified.
+
+Requesting a target of 10 buckets.
+
+[source,js]
+--------------------------------------------------
+POST /sales/_search?size=0
+{
+    "aggs" : {
+        "sales_over_time" : {
+            "auto_date_histogram" : {
+                "field" : "date",
+                "buckets" : 10
+            }
+        }
+    }
+}
+--------------------------------------------------
+// CONSOLE
+// TEST[setup:sales]
+
+==== Keys
+
+Internally, a date is represented as a 64 bit number representing a timestamp
+in milliseconds-since-the-epoch. These timestamps are returned as the bucket
+++key++s. The `key_as_string` is the same timestamp converted to a formatted
+date string using the format specified with the `format` parameter:
+
+TIP: If no `format` is specified, then it will use the first date
+<<mapping-date-format,format>> specified in the field mapping.
+
+[source,js]
+--------------------------------------------------
+POST /sales/_search?size=0
+{
+    "aggs" : {
+        "sales_over_time" : {
+            "auto_date_histogram" : {
+                "field" : "date",
+                "buckets" : 5,
+                "format" : "yyyy-MM-dd" <1>
+            }
+        }
+    }
+}
+--------------------------------------------------
+// CONSOLE
+// TEST[setup:sales]
+
+<1> Supports expressive date <<date-format-pattern,format pattern>>
+
+Response:
+
+[source,js]
+--------------------------------------------------
+{
+    ...
+    "aggregations": {
+        "sales_over_time": {
+            "buckets": [
+                {
+                    "key_as_string": "2015-01-01",
+                    "key": 1420070400000,
+                    "doc_count": 3
+                },
+                {
+                    "key_as_string": "2015-02-01",
+                    "key": 1422748800000,
+                    "doc_count": 2
+                },
+                {
+                    "key_as_string": "2015-03-01",
+                    "key": 1425168000000,
+                    "doc_count": 2
+                }
+            ]
+        }
+    }
+}
+--------------------------------------------------
+// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/]
+
+=== Intervals
+
+The interval of the returned buckets is selected based on the data collected by the 
+aggregation so that the number of buckets returned is less than or equal to the number 
+requested. The possible intervals returned are:
+
+[horizontal]
+seconds::      In multiples of 1, 5, 10 and 30
+minutes::      In multiples of 1, 5, 10 and 30
+hours::        In multiples of 1, 3 and 12
+days::         In multiples of 1, and 7
+months::       In multiples of 1, and 3
+years::        In multiples of 1, 5, 10, 20, 50 and 100
+
+In the worst case, where the number of daily buckets are too many for the requested 
+number of buckets, the number of buckets returned will be 1/7th of the number of 
+buckets requested.
+
+==== Time Zone
+
+Date-times are stored in Elasticsearch in UTC.  By default, all bucketing and
+rounding is also done in UTC. The `time_zone` parameter can be used to indicate
+that bucketing should use a different time zone.
+
+Time zones may either be specified as an ISO 8601 UTC offset (e.g. `+01:00` or
+`-08:00`)  or as a timezone id, an identifier used in the TZ database like
+`America/Los_Angeles`.
+
+Consider the following example:
+
+[source,js]
+---------------------------------
+PUT my_index/log/1?refresh
+{
+  "date": "2015-10-01T00:30:00Z"
+}
+
+PUT my_index/log/2?refresh
+{
+  "date": "2015-10-01T01:30:00Z"
+}
+
+PUT my_index/log/3?refresh
+{
+  "date": "2015-10-01T02:30:00Z"
+}
+
+GET my_index/_search?size=0
+{
+  "aggs": {
+    "by_day": {
+      "auto_date_histogram": {
+        "field":     "date",
+        "buckets" : 3
+      }
+    }
+  }
+}
+---------------------------------
+// CONSOLE
+
+UTC is used if no time zone is specified, three 1-hour buckets are returned 
+starting at midnight UTC on 1 October 2015:
+
+[source,js]
+---------------------------------
+{
+  ...
+  "aggregations": {
+    "by_day": {
+      "buckets": [
+        {
+          "key_as_string": "2015-10-01T00:00:00.000Z",
+          "key": 1443657600000,
+          "doc_count": 1
+        },
+        {
+          "key_as_string": "2015-10-01T01:00:00.000Z",
+          "key": 1443661200000,
+          "doc_count": 1
+        },
+        {
+          "key_as_string": "2015-10-01T02:00:00.000Z",
+          "key": 1443664800000,
+          "doc_count": 1
+        }
+      ]
+    }
+  }
+}
+---------------------------------
+// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/]
+
+If a `time_zone` of `-01:00` is specified, then midnight starts at one hour before
+midnight UTC:
+
+[source,js]
+---------------------------------
+GET my_index/_search?size=0
+{
+  "aggs": {
+    "by_day": {
+      "auto_date_histogram": {
+        "field":     "date",
+        "buckets" : 3,
+        "time_zone": "-01:00"
+      }
+    }
+  }
+}
+---------------------------------
+// CONSOLE
+// TEST[continued]
+
+
+Now three 1-hour buckets are still returned but the first bucket starts at 
+11:00pm on 30 September 2015 since that is the local time for the bucket in 
+the specified time zone.
+
+[source,js]
+---------------------------------
+{
+  ...
+  "aggregations": {
+    "by_day": {
+      "buckets": [
+        {
+          "key_as_string": "2015-09-30T23:00:00.000-01:00", <1>
+          "key": 1443657600000,
+          "doc_count": 1
+        },
+        {
+          "key_as_string": "2015-10-01T00:00:00.000-01:00",
+          "key": 1443661200000,
+          "doc_count": 1
+        },
+        {
+          "key_as_string": "2015-10-01T01:00:00.000-01:00",
+          "key": 1443664800000,
+          "doc_count": 1
+        }
+      ]
+    }
+  }
+}
+---------------------------------
+// TESTRESPONSE[s/\.\.\./"took": $body.took,"timed_out": false,"_shards": $body._shards,"hits": $body.hits,/]
+
+<1> The `key_as_string` value represents midnight on each day
+    in the specified time zone.
+
+WARNING: When using time zones that follow DST (daylight savings time) changes,
+buckets close to the moment when those changes happen can have slightly different
+sizes than neighbouring buckets.
+For example, consider a DST start in the `CET` time zone: on 27 March 2016 at 2am,
+clocks were turned forward 1 hour to 3am local time. If the result of the aggregation 
+was daily buckets, the bucket covering that day will only hold data for 23 hours 
+instead of the usual 24 hours for other buckets. The same is true for shorter intervals 
+like e.g. 12h. Here, we will have only a 11h bucket on the morning of 27 March when the 
+DST shift happens.
+
+==== Scripts
+
+Like with the normal <<search-aggregations-bucket-datehistogram-aggregation, `date_histogram`>>, both document level 
+scripts and value level scripts are supported. This aggregation does not however, support the `min_doc_count`, 
+`extended_bounds` and `order` parameters.  
+
+==== Missing value
+
+The `missing` parameter defines how documents that are missing a value should be treated.
+By default they will be ignored but it is also possible to treat them as if they
+had a value.
+
+[source,js]
+--------------------------------------------------
+POST /sales/_search?size=0
+{
+    "aggs" : {
+        "sale_date" : {
+             "auto_date_histogram" : {
+                 "field" : "date",
+                 "buckets": 10,
+                 "missing": "2000/01/01" <1>
+             }
+         }
+    }
+}
+--------------------------------------------------
+// CONSOLE
+// TEST[setup:sales]
+
+<1> Documents without a value in the `publish_date` field will fall into the same bucket as documents that have the value `2000-01-01`.
+
diff --git a/server/src/main/java/org/elasticsearch/search/SearchModule.java b/server/src/main/java/org/elasticsearch/search/SearchModule.java
@@ -109,8 +109,10 @@
 import org.elasticsearch.search.aggregations.bucket.geogrid.InternalGeoHashGrid;
 import org.elasticsearch.search.aggregations.bucket.global.GlobalAggregationBuilder;
 import org.elasticsearch.search.aggregations.bucket.global.InternalGlobal;
+import org.elasticsearch.search.aggregations.bucket.histogram.AutoDateHistogramAggregationBuilder;
 import org.elasticsearch.search.aggregations.bucket.histogram.DateHistogramAggregationBuilder;
 import org.elasticsearch.search.aggregations.bucket.histogram.HistogramAggregationBuilder;
+import org.elasticsearch.search.aggregations.bucket.histogram.InternalAutoDateHistogram;
 import org.elasticsearch.search.aggregations.bucket.histogram.InternalDateHistogram;
 import org.elasticsearch.search.aggregations.bucket.histogram.InternalHistogram;
 import org.elasticsearch.search.aggregations.bucket.missing.InternalMissing;
@@ -396,6 +398,8 @@ private void registerAggregations(List<SearchPlugin> plugins) {
                 HistogramAggregationBuilder::parse).addResultReader(InternalHistogram::new));
         registerAggregation(new AggregationSpec(DateHistogramAggregationBuilder.NAME, DateHistogramAggregationBuilder::new,
                 DateHistogramAggregationBuilder::parse).addResultReader(InternalDateHistogram::new));
+        registerAggregation(new AggregationSpec(AutoDateHistogramAggregationBuilder.NAME, AutoDateHistogramAggregationBuilder::new,
+                AutoDateHistogramAggregationBuilder::parse).addResultReader(InternalAutoDateHistogram::new));
         registerAggregation(new AggregationSpec(GeoDistanceAggregationBuilder.NAME, GeoDistanceAggregationBuilder::new,
                 GeoDistanceAggregationBuilder::parse).addResultReader(InternalGeoDistance::new));
         registerAggregation(new AggregationSpec(GeoGridAggregationBuilder.NAME, GeoGridAggregationBuilder::new,

diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java b/server/src/main/java/org/elasticsearch/search/aggregations/bucket/BucketsAggregator.java
@@ -84,6 +84,19 @@ public final void collectExistingBucket(LeafBucketCollector subCollector, int do
         subCollector.collect(doc, bucketOrd);
     }
 
+    public final void mergeBuckets(long[] mergeMap, long newNumBuckets) {
+        try (IntArray oldDocCounts = docCounts) {
+            docCounts = bigArrays.newIntArray(newNumBuckets, true);
+            docCounts.fill(0, newNumBuckets, 0);
+            for (int i = 0; i < oldDocCounts.size(); i++) {
+                int docCount = oldDocCounts.get(i);
+                if (docCount != 0) {
+                    docCounts.increment(mergeMap[i], docCount);
+                }
+            }
+        }
+    }
+
     public IntArray getDocCounts() {
         return docCounts;
     }