Skip to content

Commit

Permalink
Scale doubles to floats when necessary to match the field (#78344)
Browse files Browse the repository at this point in the history
This fixes a bug where the range aggregation always treats the range end points as doubles, even if the field value doesn't have enough resolution to fill a double. This was creating issues where the range would have a "more precise" approximation of an unrepresentable number than the field, causing the value to fall in the wrong bucket.

Note 1: This does not resolve the case where we have a long value that is not precisely representable as a double. Since the wire format sends the range bounds as doubles, by the time we get to where this fix is operating, we've already lost the precision to act on a big long. Fixing that problem will require a wire format change, and I'm not convinced it's worth it right now.

Note 2: This is probably still broken for ScaledFloats, since they don't implement NumberFieldType.

Resolves #77033
  • Loading branch information
not-napoleon authored Oct 11, 2021
1 parent d713c95 commit b968fcb
Show file tree
Hide file tree
Showing 7 changed files with 173 additions and 43 deletions.
2 changes: 2 additions & 0 deletions rest-api-spec/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,8 @@ tasks.named("yamlRestTestV7CompatTransform").configure { task ->
task.skipTest("search.aggregation/20_terms/string profiler via map", "The profiler results aren't backwards compatible.")
task.skipTest("search.aggregation/20_terms/numeric profiler", "The profiler results aren't backwards compatible.")

task.skipTest("msearch/20_typed_keys/Multisearch test with typed_keys parameter", "Test contains a no longer valid work around for the float range bug")

task.replaceValueInMatch("_type", "_doc")
task.addAllowedWarningRegex("\\[types removal\\].*")
task.replaceValueInMatch("nodes.\$node_id.roles.8", "ml", "node_info role test")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ setup:
- index: test-*
- {query: {match: {bool: true} }, size: 0, aggs: {test_filter: {filter: {range: {integer: {gte: 20} } } } } }
- index: test-1
- {query: {match_all: {} }, size: 0, aggs: {test_range: {range: {field: float, ranges: [ {to: 19.2499999}, {from: 19.25} ] } } } }
- {query: {match_all: {} }, size: 0, aggs: {test_range: {range: {field: float, ranges: [ {to: 19.25}, {from: 19.25} ] } } } }
- index: test-*
- {query: {bool: {filter: {range: {row: {lt: 5}}} } }, size: 0, aggs: {test_percentiles: {percentiles: {field: float} } } }
# Testing suggesters
Expand All @@ -78,7 +78,7 @@ setup:
- match: { responses.0.hits.total: 3 }
- match: { responses.0.aggregations.filter#test_filter.doc_count : 2 }
- match: { responses.1.hits.total: 3 }
- match: { responses.1.aggregations.range#test_range.buckets.0.key : "*-19.2499999" }
- match: { responses.1.aggregations.range#test_range.buckets.0.key : "*-19.25" }
- match: { responses.1.aggregations.range#test_range.buckets.0.doc_count : 2 }
- match: { responses.1.aggregations.range#test_range.buckets.1.key : "19.25-*" }
- match: { responses.1.aggregations.range#test_range.buckets.1.doc_count : 1 }
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,10 @@ setup:
type: double
long:
type: long
float:
type: float
half_float:
type: half_float

- do:
cluster.health:
Expand All @@ -22,15 +26,71 @@ setup:
refresh: true
body:
- {"index": {}}
- { "double" : 42.1, "long": 25 }
- { "double" : 42.1, "long": 25, "float": 0.01, "half_float": 0.01 }
- {"index": {}}
- { "double" : 100.7, "long": 80 }
- { "double" : 100.7, "long": 80, "float": 0.03, "half_float": 0.0152 }
- {"index": {}}
- { "double" : 50.5, "long": 75}
- { "double" : 50.5, "long": 75, "float": 0.04, "half_float": 0.04 }
# For testing missing values
- {"index": {}}
- {}

---
"Float Endpoint Exclusive":
- skip:
version: " - 7.99.99"
reason: Bug fixed in 7.16.0 (backport pending)
- do:
search:
body:
size: 0
aggs:
double_range:
range:
format: "0.0#"
field: "float"
ranges:
-
from: 0
to: 0.04
- from: 0.04
to: 1.0
- match: { hits.total.relation: "eq" }
- match: { hits.total.value: 4 }
- length: { aggregations.double_range.buckets: 2 }
- match: { aggregations.double_range.buckets.0.key: "0.0-0.04" }
- match: { aggregations.double_range.buckets.0.doc_count: 2 }
- match: { aggregations.double_range.buckets.1.key: "0.04-1.0" }
- match: { aggregations.double_range.buckets.1.doc_count: 1 }

---
"Half Float Endpoint Exclusive":
- skip:
version: " - 7.99.99"
reason: Bug fixed in 7.16.0 (backport pending)
- do:
search:
body:
size: 0
aggs:
double_range:
range:
format: "0.0###"
field: "half_float"
ranges:
-
from: 0
to: 0.0152
- from: 0.0152
to: 1.0
- match: { hits.total.relation: "eq" }
- match: { hits.total.value: 4 }
- length: { aggregations.double_range.buckets: 2 }
- match: { aggregations.double_range.buckets.0.key: "0.0-0.0152" }
- match: { aggregations.double_range.buckets.0.doc_count: 1 }
- match: { aggregations.double_range.buckets.1.key: "0.0152-1.0" }
- match: { aggregations.double_range.buckets.1.doc_count: 2 }

---
"Double range":
- do:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1040,6 +1040,21 @@ public String typeName() {
return type.name;
}

/**
* This method reinterprets a double precision value based on the maximum precision of the stored number field. Mostly this
* corrects for unrepresentable values which have different approximations when cast from floats than when parsed as doubles.
* It may seem strange to convert a double to a double, and it is. This function's goal is to reduce the precision
* on the double in the case that the backing number type would have parsed the value differently. This is to address
* the problem where (e.g.) 0.04F < 0.04D, which causes problems for range aggregations.
*/
public double reduceToStoredPrecision(double value) {
if (Double.isInfinite(value)) {
// Trying to parse infinite values into ints/longs throws. Understandably.
return value;
}
return type.parse(value, false).doubleValue();
}

public NumericType numericType() {
return type.numericType();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import java.io.IOException;
import java.util.List;
import java.util.Map;
import java.util.function.DoubleUnaryOperator;

public class RangeAggregationBuilder extends AbstractRangeBuilder<RangeAggregationBuilder, Range> {
public static final String NAME = "range";
Expand Down Expand Up @@ -152,12 +153,18 @@ protected RangeAggregatorFactory innerBuild(
) throws IOException {
RangeAggregatorSupplier aggregatorSupplier = context.getValuesSourceRegistry().getAggregator(REGISTRY_KEY, config);

/*
This will downgrade the precision of the range bounds to match the field's precision. Fixes float/double issues, but not
long/double issues. See https://github.com/elastic/elasticsearch/issues/77033
*/
DoubleUnaryOperator fixPrecision = config.reduceToStoredPrecisionFunction();

// We need to call processRanges here so they are parsed before we make the decision of whether to cache the request
Range[] ranges = processRanges(range -> {
DocValueFormat parser = config.format();
assert parser != null;
Double from = range.from;
Double to = range.to;
Double from = fixPrecision.applyAsDouble(range.from);
Double to = fixPrecision.applyAsDouble(range.to);
if (range.fromAsStr != null) {
from = parser.parseDouble(range.fromAsStr, false, context::nowInMillis);
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,15 @@
import org.elasticsearch.index.fielddata.IndexGeoPointFieldData;
import org.elasticsearch.index.fielddata.IndexNumericFieldData;
import org.elasticsearch.index.mapper.MappedFieldType;
import org.elasticsearch.index.mapper.NumberFieldMapper;
import org.elasticsearch.index.mapper.RangeFieldMapper;
import org.elasticsearch.script.AggregationScript;
import org.elasticsearch.script.Script;
import org.elasticsearch.search.DocValueFormat;

import java.io.IOException;
import java.time.ZoneId;
import java.util.function.DoubleUnaryOperator;
import java.util.function.Function;

/**
Expand Down Expand Up @@ -321,6 +323,17 @@ public FieldContext fieldContext() {
return fieldContext;
}

/**
* Returns a function from the mapper that adjusts a double value to the value it would have been had it been parsed by that mapper
* and then cast up to a double. Used to correct precision errors.
*/
public DoubleUnaryOperator reduceToStoredPrecisionFunction() {
if (fieldContext() != null && fieldType() instanceof NumberFieldMapper.NumberFieldType) {
return ((NumberFieldMapper.NumberFieldType) fieldType())::reduceToStoredPrecision;
}
return (value) -> value;
}

/**
* Convenience method for looking up the mapped field type backing this values source, if it exists.
*/
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,11 @@
import org.apache.lucene.document.NumericDocValuesField;
import org.apache.lucene.document.SortedNumericDocValuesField;
import org.apache.lucene.document.SortedSetDocValuesField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.RandomIndexWriter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.MatchAllDocsQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.NumericUtils;
import org.elasticsearch.core.CheckedConsumer;
import org.elasticsearch.index.mapper.DateFieldMapper;
import org.elasticsearch.index.mapper.DateFieldMapper.Resolution;
Expand Down Expand Up @@ -103,6 +100,69 @@ public void testMatchesNumericDocValues() throws IOException {
});
}

/**
* Confirm that a non-representable decimal stored as a double correctly follows the half-open interval rule
*/
public void testDoubleRangesExclusiveEndpoint() throws IOException {
final String fieldName = "double";
MappedFieldType field = new NumberFieldMapper.NumberFieldType(fieldName, NumberType.DOUBLE);
testCase(
new RangeAggregationBuilder("range").field(fieldName).addRange("r1", 0, 0.04D).addRange("r2", 0.04D, 1.0D),
new MatchAllDocsQuery(),
iw -> { iw.addDocument(List.of(new SortedNumericDocValuesField(fieldName, NumericUtils.doubleToSortableLong(0.04D)))); },
result -> {
InternalRange<?, ?> range = (InternalRange<?, ?>) result;
List<? extends InternalRange.Bucket> ranges = range.getBuckets();
assertEquals(2, ranges.size());
assertEquals(0, ranges.get(0).getDocCount());
assertEquals(1, ranges.get(1).getDocCount());
},
field
);
}

/**
* Confirm that a non-representable decimal stored as a float correctly follows the half-open interval rule
*/
public void testFloatRangesExclusiveEndpoint() throws IOException {
final String fieldName = "float";
MappedFieldType field = new NumberFieldMapper.NumberFieldType(fieldName, NumberType.FLOAT);
testCase(
new RangeAggregationBuilder("range").field(fieldName).addRange("r1", 0, 0.04D).addRange("r2", 0.04D, 1.0D),
new MatchAllDocsQuery(),
iw -> { iw.addDocument(NumberType.FLOAT.createFields(fieldName, 0.04F, false, true, false)); },
result -> {
InternalRange<?, ?> range = (InternalRange<?, ?>) result;
List<? extends InternalRange.Bucket> ranges = range.getBuckets();
assertEquals(2, ranges.size());
assertEquals(0, ranges.get(0).getDocCount());
assertEquals(1, ranges.get(1).getDocCount());
},
field
);
}

/**
* Confirm that a non-representable decimal stored as a half_float correctly follows the half-open interval rule
*/
public void testHalfFloatRangesExclusiveEndpoint() throws IOException {
final String fieldName = "halfFloat";
MappedFieldType field = new NumberFieldMapper.NumberFieldType(fieldName, NumberType.HALF_FLOAT);
testCase(
new RangeAggregationBuilder("range").field(fieldName).addRange("r1", 0, 0.0152D).addRange("r2", 0.0152D, 1.0D),
new MatchAllDocsQuery(),
iw -> { iw.addDocument(NumberType.HALF_FLOAT.createFields(fieldName, 0.0152F, false, true, false)); },
result -> {
InternalRange<?, ?> range = (InternalRange<?, ?>) result;
List<? extends InternalRange.Bucket> ranges = range.getBuckets();
assertEquals(2, ranges.size());
assertEquals(0, ranges.get(0).getDocCount());
assertEquals(1, ranges.get(1).getDocCount());
},
field
);
}

public void testUnboundedRanges() throws IOException {
testCase(
new RangeAggregationBuilder("name").field(NUMBER_FIELD_NAME).addUnboundedTo(5).addUnboundedFrom(5),
Expand Down Expand Up @@ -174,7 +234,7 @@ public void testDateFieldMillisecondResolution() throws IOException {
testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
iw.addDocument(List.of(new SortedNumericDocValuesField(DATE_FIELD_NAME, milli1), new LongPoint(DATE_FIELD_NAME, milli1)));
iw.addDocument(List.of(new SortedNumericDocValuesField(DATE_FIELD_NAME, milli2), new LongPoint(DATE_FIELD_NAME, milli2)));
}, range -> {
}, (Consumer<InternalRange<?, ?>>) range -> {
List<? extends InternalRange.Bucket> ranges = range.getBuckets();
assertEquals(1, ranges.size());
assertEquals(1, ranges.get(0).getDocCount());
Expand Down Expand Up @@ -205,7 +265,7 @@ public void testDateFieldNanosecondResolution() throws IOException {
testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
iw.addDocument(singleton(new SortedNumericDocValuesField(DATE_FIELD_NAME, TimeUnit.MILLISECONDS.toNanos(milli1))));
iw.addDocument(singleton(new SortedNumericDocValuesField(DATE_FIELD_NAME, TimeUnit.MILLISECONDS.toNanos(milli2))));
}, range -> {
}, (Consumer<InternalRange<?, ?>>) range -> {
List<? extends InternalRange.Bucket> ranges = range.getBuckets();
assertEquals(1, ranges.size());
assertEquals(1, ranges.get(0).getDocCount());
Expand Down Expand Up @@ -239,7 +299,7 @@ public void testMissingDateWithDateNanosField() throws IOException {
iw.addDocument(singleton(new SortedNumericDocValuesField(DATE_FIELD_NAME, TimeUnit.MILLISECONDS.toNanos(milli2))));
// Missing will apply to this document
iw.addDocument(singleton(new SortedNumericDocValuesField(NUMBER_FIELD_NAME, 7)));
}, range -> {
}, (Consumer<InternalRange<?, ?>>) range -> {
List<? extends InternalRange.Bucket> ranges = range.getBuckets();
assertEquals(1, ranges.size());
assertEquals(2, ranges.get(0).getDocCount());
Expand Down Expand Up @@ -273,7 +333,7 @@ public void testNotFitIntoDouble() throws IOException {
for (long l = start; l < start + 150; l++) {
iw.addDocument(List.of(new SortedNumericDocValuesField(NUMBER_FIELD_NAME, l), new LongPoint(NUMBER_FIELD_NAME, l)));
}
}, range -> {
}, (Consumer<InternalRange<?, ?>>) range -> {
List<? extends InternalRange.Bucket> ranges = range.getBuckets();
assertThat(ranges, hasSize(3));
// If we had a native `double` range aggregator we'd get 50, 50, 50
Expand Down Expand Up @@ -305,7 +365,7 @@ public void testUnmappedWithMissingNumber() throws IOException {
testCase(aggregationBuilder, new MatchAllDocsQuery(), iw -> {
iw.addDocument(singleton(new NumericDocValuesField(NUMBER_FIELD_NAME, 7)));
iw.addDocument(singleton(new NumericDocValuesField(NUMBER_FIELD_NAME, 1)));
}, range -> {
}, (Consumer<InternalRange<?, ?>>) range -> {
List<? extends InternalRange.Bucket> ranges = range.getBuckets();
assertEquals(1, ranges.size());
assertEquals(2, ranges.get(0).getDocCount());
Expand Down Expand Up @@ -542,31 +602,4 @@ private void simpleTestCase(
iw.addDocument(singleton(new SortedNumericDocValuesField(NUMBER_FIELD_NAME, 3)));
}, verify, fieldType);
}

private void testCase(
RangeAggregationBuilder aggregationBuilder,
Query query,
CheckedConsumer<RandomIndexWriter, IOException> buildIndex,
Consumer<InternalRange<? extends InternalRange.Bucket, ? extends InternalRange<?, ?>>> verify,
MappedFieldType fieldType
) throws IOException {
try (Directory directory = newDirectory()) {
RandomIndexWriter indexWriter = new RandomIndexWriter(random(), directory);
buildIndex.accept(indexWriter);
indexWriter.close();

try (IndexReader indexReader = DirectoryReader.open(directory)) {
IndexSearcher indexSearcher = newSearcher(indexReader, true, true);

InternalRange<? extends InternalRange.Bucket, ? extends InternalRange<?, ?>> agg = searchAndReduce(
indexSearcher,
query,
aggregationBuilder,
fieldType
);
verify.accept(agg);

}
}
}
}

0 comments on commit b968fcb

Please sign in to comment.