Scale doubles to floats when necessary to match the field (#78344)

This fixes a bug where the range aggregation always treats the range end points as doubles, even if the field value doesn't have enough resolution to fill a double. This was creating issues where the range would have a "more precise" approximation of an unrepresentable number than the field, causing the value to fall in the wrong bucket. Note 1: This does not resolve the case where we have a long value that is not precisely representable as a double. Since the wire format sends the range bounds as doubles, by the time we get to where this fix is operating, we've already lost the precision to act on a big long. Fixing that problem will require a wire format change, and I'm not convinced it's worth it right now. Note 2: This is probably still broken for ScaledFloats, since they don't implement NumberFieldType. Resolves #77033
elastic · Oct 11, 2021 · b968fcb · b968fcb
1 parent d713c95
commit b968fcb
Show file tree

Hide file tree

Showing 7 changed files with 173 additions and 43 deletions.
diff --git a/rest-api-spec/build.gradle b/rest-api-spec/build.gradle
@@ -82,6 +82,8 @@ tasks.named("yamlRestTestV7CompatTransform").configure { task ->
   task.skipTest("search.aggregation/20_terms/string profiler via map", "The profiler results aren't backwards compatible.")
   task.skipTest("search.aggregation/20_terms/numeric profiler", "The profiler results aren't backwards compatible.")
 
+  task.skipTest("msearch/20_typed_keys/Multisearch test with typed_keys parameter", "Test contains a no longer valid work around for the float range bug")
+
   task.replaceValueInMatch("_type", "_doc")
   task.addAllowedWarningRegex("\\[types removal\\].*")
   task.replaceValueInMatch("nodes.\$node_id.roles.8", "ml", "node_info role test")

diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/msearch/20_typed_keys.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/msearch/20_typed_keys.yml
@@ -64,7 +64,7 @@ setup:
           - index: test-*
           - {query: {match: {bool: true} }, size: 0, aggs: {test_filter: {filter: {range: {integer: {gte: 20} } } } } }
           - index: test-1
-          - {query: {match_all: {} }, size: 0, aggs: {test_range: {range: {field: float, ranges: [ {to: 19.2499999}, {from: 19.25} ] } } } }
+          - {query: {match_all: {} }, size: 0, aggs: {test_range: {range: {field: float, ranges: [ {to: 19.25}, {from: 19.25} ] } } } }
           - index: test-*
           - {query: {bool: {filter: {range: {row: {lt: 5}}} } }, size: 0, aggs: {test_percentiles: {percentiles: {field: float} } } }
           # Testing suggesters
@@ -78,7 +78,7 @@ setup:
   - match:    { responses.0.hits.total: 3 }
   - match:    { responses.0.aggregations.filter#test_filter.doc_count : 2 }
   - match:    { responses.1.hits.total: 3 }
-  - match:    { responses.1.aggregations.range#test_range.buckets.0.key : "*-19.2499999" }
+  - match:    { responses.1.aggregations.range#test_range.buckets.0.key : "*-19.25" }
   - match:    { responses.1.aggregations.range#test_range.buckets.0.doc_count : 2 }
   - match:    { responses.1.aggregations.range#test_range.buckets.1.key : "19.25-*" }
   - match:    { responses.1.aggregations.range#test_range.buckets.1.doc_count : 1 }

diff --git a/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/40_range.yml b/rest-api-spec/src/yamlRestTest/resources/rest-api-spec/test/search.aggregation/40_range.yml
@@ -11,6 +11,10 @@ setup:
                   type: double
                 long:
                   type: long
+                float:
+                  type: float
+                half_float:
+                  type: half_float
 
   - do:
       cluster.health:
@@ -22,15 +26,71 @@ setup:
         refresh: true
         body:
          - {"index": {}}
-         - { "double" : 42.1, "long": 25 }
+         - { "double" : 42.1, "long": 25, "float": 0.01, "half_float": 0.01 }
          - {"index": {}}
-         - { "double" : 100.7, "long": 80 }
+         - { "double" : 100.7, "long": 80, "float": 0.03, "half_float": 0.0152 }
          - {"index": {}}
-         - { "double" : 50.5, "long":  75}
+         - { "double" : 50.5, "long":  75, "float": 0.04, "half_float": 0.04 }
 # For testing missing values
          - {"index": {}}
          - {}
 
+---
+"Float Endpoint Exclusive":
+  - skip:
+      version: " - 7.99.99"
+      reason:  Bug fixed in 7.16.0 (backport pending)
+  - do:
+      search:
+        body:
+          size: 0
+          aggs:
+            double_range:
+              range:
+                format: "0.0#"
+                field: "float"
+                ranges:
+                  -
+                    from: 0
+                    to: 0.04
+                  - from: 0.04
+                    to: 1.0
+  - match: { hits.total.relation: "eq" }
+  - match: { hits.total.value: 4 }
+  - length: { aggregations.double_range.buckets: 2 }
+  - match: { aggregations.double_range.buckets.0.key: "0.0-0.04" }
+  - match: { aggregations.double_range.buckets.0.doc_count: 2 }
+  - match: { aggregations.double_range.buckets.1.key: "0.04-1.0" }
+  - match: { aggregations.double_range.buckets.1.doc_count: 1 }
+
+---
+"Half Float Endpoint Exclusive":
+  - skip:
+      version: " - 7.99.99"
+      reason:  Bug fixed in 7.16.0 (backport pending)
+  - do:
+      search:
+        body:
+          size: 0
+          aggs:
+            double_range:
+              range:
+                format: "0.0###"
+                field: "half_float"
+                ranges:
+                  -
+                    from: 0
+                    to: 0.0152
+                  - from: 0.0152
+                    to: 1.0
+  - match: { hits.total.relation: "eq" }
+  - match: { hits.total.value: 4 }
+  - length: { aggregations.double_range.buckets: 2 }
+  - match: { aggregations.double_range.buckets.0.key: "0.0-0.0152" }
+  - match: { aggregations.double_range.buckets.0.doc_count: 1 }
+  - match: { aggregations.double_range.buckets.1.key: "0.0152-1.0" }
+  - match: { aggregations.double_range.buckets.1.doc_count: 2 }
+
 ---
 "Double range":
   - do:

diff --git a/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java b/server/src/main/java/org/elasticsearch/index/mapper/NumberFieldMapper.java
@@ -1040,6 +1040,21 @@ public String typeName() {
             return type.name;
         }
 
+        /**
+         * This method reinterprets a double precision value based on the maximum precision of the stored number field.  Mostly this
+         * corrects for unrepresentable values which have different approximations when cast from floats than when parsed as doubles.
+         * It may seem strange to convert a double to a double, and it is.  This function's goal is to reduce the precision
+         * on the double in the case that the backing number type would have parsed the value differently.  This is to address
+         * the problem where (e.g.) 0.04F &lt; 0.04D, which causes problems for range aggregations.
+         */
+        public double reduceToStoredPrecision(double value) {
+            if (Double.isInfinite(value)) {
+                // Trying to parse infinite values into ints/longs throws. Understandably.
+                return value;
+            }
+          return type.parse(value, false).doubleValue();
+        }
+
         public NumericType numericType() {
             return type.numericType();
         }

diff --git a/...main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregationBuilder.java b/...main/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregationBuilder.java
@@ -24,6 +24,7 @@
 import java.io.IOException;
 import java.util.List;
 import java.util.Map;
+import java.util.function.DoubleUnaryOperator;
 
 public class RangeAggregationBuilder extends AbstractRangeBuilder<RangeAggregationBuilder, Range> {
     public static final String NAME = "range";
@@ -152,12 +153,18 @@ protected RangeAggregatorFactory innerBuild(
     ) throws IOException {
         RangeAggregatorSupplier aggregatorSupplier = context.getValuesSourceRegistry().getAggregator(REGISTRY_KEY, config);
 
+        /*
+         This will downgrade the precision of the range bounds to match the field's precision.  Fixes float/double issues, but not
+         long/double issues.  See https://github.com/elastic/elasticsearch/issues/77033
+         */
+        DoubleUnaryOperator fixPrecision = config.reduceToStoredPrecisionFunction();
+
         // We need to call processRanges here so they are parsed before we make the decision of whether to cache the request
         Range[] ranges = processRanges(range -> {
             DocValueFormat parser = config.format();
             assert parser != null;
-            Double from = range.from;
-            Double to = range.to;
+            Double from = fixPrecision.applyAsDouble(range.from);
+            Double to = fixPrecision.applyAsDouble(range.to);
             if (range.fromAsStr != null) {
                 from = parser.parseDouble(range.fromAsStr, false, context::nowInMillis);
             }

diff --git a/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfig.java b/server/src/main/java/org/elasticsearch/search/aggregations/support/ValuesSourceConfig.java
@@ -13,13 +13,15 @@
 import org.elasticsearch.index.fielddata.IndexGeoPointFieldData;
 import org.elasticsearch.index.fielddata.IndexNumericFieldData;
 import org.elasticsearch.index.mapper.MappedFieldType;
+import org.elasticsearch.index.mapper.NumberFieldMapper;
 import org.elasticsearch.index.mapper.RangeFieldMapper;
 import org.elasticsearch.script.AggregationScript;
 import org.elasticsearch.script.Script;
 import org.elasticsearch.search.DocValueFormat;
 
 import java.io.IOException;
 import java.time.ZoneId;
+import java.util.function.DoubleUnaryOperator;
 import java.util.function.Function;
 
 /**
@@ -321,6 +323,17 @@ public FieldContext fieldContext() {
         return fieldContext;
     }
 
+    /**
+     * Returns a function from the mapper that adjusts a double value to the value it would have been had it been parsed by that mapper
+     * and then cast up to a double.  Used to correct precision errors.
+     */
+    public DoubleUnaryOperator reduceToStoredPrecisionFunction() {
+        if (fieldContext() != null && fieldType() instanceof NumberFieldMapper.NumberFieldType) {
+            return ((NumberFieldMapper.NumberFieldType) fieldType())::reduceToStoredPrecision;
+        }
+        return (value) -> value;
+    }
+
     /**
      * Convenience method for looking up the mapped field type backing this values source, if it exists.
      */

diff --git a/...rc/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java b/...rc/test/java/org/elasticsearch/search/aggregations/bucket/range/RangeAggregatorTests.java
@@ -13,14 +13,11 @@
 import org.apache.lucene.document.NumericDocValuesField;
 import org.apache.lucene.document.SortedNumericDocValuesField;
 import org.apache.lucene.document.SortedSetDocValuesField;
-import org.apache.lucene.index.DirectoryReader;
-import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.RandomIndexWriter;
-import org.apache.lucene.search.IndexSearcher;
 import org.apache.lucene.search.MatchAllDocsQuery;
 import org.apache.lucene.search.Query;
-import org.apache.lucene.store.Directory;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.NumericUtils;
 import org.elasticsearch.core.CheckedConsumer;
 import org.elasticsearch.index.mapper.DateFieldMapper;
 import org.elasticsearch.index.mapper.DateFieldMapper.Resolution;
@@ -103,6 +100,69 @@ public void testMatchesNumericDocValues() throws IOException {
         });
     }
 
+    /**
+     * Confirm that a non-representable decimal stored as a double correctly follows the half-open interval rule
+     */
+    public void testDoubleRangesExclusiveEndpoint() throws IOException {
+        final String fieldName = "double";
+        MappedFieldType field = new NumberFieldMapper.NumberFieldType(fieldName, NumberType.DOUBLE);
+        testCase(
+            new RangeAggregationBuilder("range").field(fieldName).addRange("r1", 0, 0.04D).addRange("r2", 0.04D, 1.0D),
+            new MatchAllDocsQuery(),
+            iw -> { iw.addDocument(List.of(new SortedNumericDocValuesField(fieldName, NumericUtils.doubleToSortableLong(0.04D)))); },
+            result -> {
+                InternalRange<?, ?> range = (InternalRange<?, ?>) result;
+                List<? extends InternalRange.Bucket> ranges = range.getBuckets();
+                assertEquals(2, ranges.size());
+                assertEquals(0, ranges.get(0).getDocCount());
+                assertEquals(1, ranges.get(1).getDocCount());
+            },
+            field
+        );
+    }
+
+    /**
+     * Confirm that a non-representable decimal stored as a float correctly follows the half-open interval rule
+     */
+    public void testFloatRangesExclusiveEndpoint() throws IOException {
+        final String fieldName = "float";
+        MappedFieldType field = new NumberFieldMapper.NumberFieldType(fieldName, NumberType.FLOAT);
+        testCase(
+            new RangeAggregationBuilder("range").field(fieldName).addRange("r1", 0, 0.04D).addRange("r2", 0.04D, 1.0D),
+            new MatchAllDocsQuery(),
+            iw -> { iw.addDocument(NumberType.FLOAT.createFields(fieldName, 0.04F, false, true, false)); },
+            result -> {
+                InternalRange<?, ?> range = (InternalRange<?, ?>) result;
+                List<? extends InternalRange.Bucket> ranges = range.getBuckets();
+                assertEquals(2, ranges.size());
+                assertEquals(0, ranges.get(0).getDocCount());
+                assertEquals(1, ranges.get(1).getDocCount());
+            },
+            field
+        );
+    }
+
+    /**
+     * Confirm that a non-representable decimal stored as a half_float correctly follows the half-open interval rule
+     */
+    public void testHalfFloatRangesExclusiveEndpoint() throws IOException {
+        final String fieldName = "halfFloat";
+        MappedFieldType field = new NumberFieldMapper.NumberFieldType(fieldName, NumberType.HALF_FLOAT);
+        testCase(
+            new RangeAggregationBuilder("range").field(fieldName).addRange("r1", 0, 0.0152D).addRange("r2", 0.0152D, 1.0D),
+            new MatchAllDocsQuery(),
+            iw -> { iw.addDocument(NumberType.HALF_FLOAT.createFields(fieldName, 0.0152F, false, true, false)); },
+            result -> {
+                InternalRange<?, ?> range = (InternalRange<?, ?>) result;
+                List<? extends InternalRange.Bucket> ranges = range.getBuckets();
+                assertEquals(2, ranges.size());
+                assertEquals(0, ranges.get(0).getDocCount());
+                assertEquals(1, ranges.get(1).getDocCount());
+            },
+            field
+        );
+    }
+
     public void testUnboundedRanges() throws IOException {
         testCase(
             new RangeAggregationBuilder("name").field(NUMBER_FIELD_NAME).addUnboundedTo(5).addUnboundedFrom(5),
@@ -174,7 +234,7 @@ public void testDateFieldMillisecondResolution() throws IOException {
         testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
             iw.addDocument(List.of(new SortedNumericDocValuesField(DATE_FIELD_NAME, milli1), new LongPoint(DATE_FIELD_NAME, milli1)));
             iw.addDocument(List.of(new SortedNumericDocValuesField(DATE_FIELD_NAME, milli2), new LongPoint(DATE_FIELD_NAME, milli2)));
-        }, range -> {
+        }, (Consumer<InternalRange<?, ?>>) range -> {
             List<? extends InternalRange.Bucket> ranges = range.getBuckets();
             assertEquals(1, ranges.size());
             assertEquals(1, ranges.get(0).getDocCount());
@@ -205,7 +265,7 @@ public void testDateFieldNanosecondResolution() throws IOException {
         testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
             iw.addDocument(singleton(new SortedNumericDocValuesField(DATE_FIELD_NAME, TimeUnit.MILLISECONDS.toNanos(milli1))));
             iw.addDocument(singleton(new SortedNumericDocValuesField(DATE_FIELD_NAME, TimeUnit.MILLISECONDS.toNanos(milli2))));
-        }, range -> {
+        }, (Consumer<InternalRange<?, ?>>) range -> {
             List<? extends InternalRange.Bucket> ranges = range.getBuckets();
             assertEquals(1, ranges.size());
             assertEquals(1, ranges.get(0).getDocCount());
@@ -239,7 +299,7 @@ public void testMissingDateWithDateNanosField() throws IOException {
             iw.addDocument(singleton(new SortedNumericDocValuesField(DATE_FIELD_NAME, TimeUnit.MILLISECONDS.toNanos(milli2))));
             // Missing will apply to this document
             iw.addDocument(singleton(new SortedNumericDocValuesField(NUMBER_FIELD_NAME, 7)));
-        }, range -> {
+        }, (Consumer<InternalRange<?, ?>>) range -> {
             List<? extends InternalRange.Bucket> ranges = range.getBuckets();
             assertEquals(1, ranges.size());
             assertEquals(2, ranges.get(0).getDocCount());
@@ -273,7 +333,7 @@ public void testNotFitIntoDouble() throws IOException {
             for (long l = start; l < start + 150; l++) {
                 iw.addDocument(List.of(new SortedNumericDocValuesField(NUMBER_FIELD_NAME, l), new LongPoint(NUMBER_FIELD_NAME, l)));
             }
-        }, range -> {
+        }, (Consumer<InternalRange<?, ?>>) range -> {
             List<? extends InternalRange.Bucket> ranges = range.getBuckets();
             assertThat(ranges, hasSize(3));
             // If we had a native `double` range aggregator we'd get 50, 50, 50
@@ -305,7 +365,7 @@ public void testUnmappedWithMissingNumber() throws IOException {
         testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
             iw.addDocument(singleton(new NumericDocValuesField(NUMBER_FIELD_NAME, 7)));
             iw.addDocument(singleton(new NumericDocValuesField(NUMBER_FIELD_NAME, 1)));
-        }, range -> {
+        }, (Consumer<InternalRange<?, ?>>) range -> {
             List<? extends InternalRange.Bucket> ranges = range.getBuckets();
             assertEquals(1, ranges.size());
             assertEquals(2, ranges.get(0).getDocCount());
@@ -542,31 +602,4 @@ private void simpleTestCase(
             iw.addDocument(singleton(new SortedNumericDocValuesField(NUMBER_FIELD_NAME, 3)));
         }, verify, fieldType);
     }
-
-    private void testCase(
-        RangeAggregationBuilder aggregationBuilder,
-        Query query,
-        CheckedConsumer<RandomIndexWriter, IOException> buildIndex,
-        Consumer<InternalRange<? extends InternalRange.Bucket, ? extends InternalRange<?, ?>>> verify,
-        MappedFieldType fieldType
-    ) throws IOException {
-        try (Directory directory = newDirectory()) {
-            RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
-            buildIndex.accept(indexWriter);
-            indexWriter.close();
-
-            try (IndexReader indexReader = DirectoryReader.open(directory)) {
-                IndexSearcher indexSearcher = newSearcher(indexReader, true, true);
-
-                InternalRange<? extends InternalRange.Bucket, ? extends InternalRange<?, ?>> agg = searchAndReduce(
-                    indexSearcher,
-                    query,
-                    aggregationBuilder,
-                    fieldType
-                );
-                verify.accept(agg);
-
-            }
-        }
-    }
 }