Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[OTEL-2348] Improve DDSketch to Sketch conversion #468

Open
wants to merge 2 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions .chloggen/improve-sketch-conversion.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# One of 'breaking', 'deprecation', 'new_component', 'enhancement', 'bug_fix'
change_type: bug_fix

# The name of the component (e.g. pkg/quantile)
component: pkg/quantile

# A brief description of the change. Surround your text with quotes ("") if it needs to start with a backtick (`).
note: Improve stability of OTel histogram conversion

# The PR related to this change
issues: [468]

# (Optional) One or more lines of additional information to render under the primary note.
# These lines will be padded with 2 spaces and then inserted directly into the document.
# Use pipe (|) for multiline entries.
subtext:
2 changes: 1 addition & 1 deletion pkg/otlp/metrics/sketches_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -506,7 +506,7 @@ func TestKnownDistributionsQuantile(t *testing.T) {
startTime := timeNow.Add(-10 * time.Second)
name := "example.histo"
const (
N uint64 = 1_000
N uint64 = 2_000
M uint64 = 100
)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@
1344
],
"Counts": [
5,
6,
4,
1,
10,
0,
2,
10,
1,
1,
2,
3,
4
3
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@
1344
],
"Counts": [
5,
6,
4,
1,
10,
0,
2,
10,
1,
1,
2,
3,
4
3
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@
1344
],
"Counts": [
5,
6,
4,
1,
10,
0,
2,
10,
1,
1,
2,
3,
4
3
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@
1344
],
"Counts": [
5,
6,
4,
1,
10,
0,
2,
10,
1,
1,
2,
3,
4
3
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@
1344
],
"Counts": [
5,
6,
4,
1,
10,
0,
2,
10,
1,
1,
2,
3,
4
3
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@
1344
],
"Counts": [
5,
6,
4,
1,
10,
0,
2,
10,
1,
1,
2,
3,
4
3
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@
1344
],
"Counts": [
5,
6,
4,
1,
10,
0,
2,
10,
1,
1,
2,
3,
4
3
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,15 +33,15 @@
1344
],
"Counts": [
5,
6,
4,
1,
10,
0,
2,
10,
1,
1,
2,
3,
4
3
]
}
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,15 +31,15 @@
1344
],
"Counts": [
5,
6,
4,
1,
10,
0,
2,
10,
1,
1,
2,
3,
4
3
]
}
],
Expand Down
40 changes: 10 additions & 30 deletions pkg/quantile/ddsketch.go
Original file line number Diff line number Diff line change
Expand Up @@ -51,39 +51,24 @@ type floatKeyCount struct {

// convertFloatCountsToIntCounts converts a list of float counts to integer counts,
// preserving the total count of the list by tracking leftover decimal counts.
// TODO: this tends to shift sketches towards the right (since the leftover counts
// get added to the rightmost bucket). This could be improved by adding leftover
// counts to the key that's the weighted average of keys that contribute to that leftover count.
func convertFloatCountsToIntCounts(floatKeyCounts []floatKeyCount) []KeyCount {
func convertFloatCountsToIntCounts(floatKeyCounts []floatKeyCount) ([]KeyCount, uint) {
keyCounts := make([]KeyCount, 0, len(floatKeyCounts))

sort.Slice(floatKeyCounts, func(i, j int) bool {
return floatKeyCounts[i].k < floatKeyCounts[j].k
})

leftoverCount := 0.0
floatTotal := 0.0
intTotal := uint(0)
for _, fkc := range floatKeyCounts {
key := fkc.k
count := fkc.c

// Add leftovers from previous bucket, and compute leftovers
// for the next bucket
count += leftoverCount
uintCount := uint(count)
leftoverCount = count - float64(uintCount)

keyCounts = append(keyCounts, KeyCount{k: Key(key), n: uintCount})
floatTotal += fkc.c
rounded := uint(math.Round(floatTotal)) - intTotal
intTotal += rounded
// At this point, intTotal == Round(floatTotal)
keyCounts = append(keyCounts, KeyCount{k: Key(fkc.k), n: rounded})
}

// Edge case where there may be some leftover count because the total count
// isn't an int (or due to float64 precision errors). In this case, round to
// nearest.
if leftoverCount >= 0.5 {
lastIndex := len(keyCounts) - 1
keyCounts[lastIndex] = KeyCount{k: keyCounts[lastIndex].k, n: keyCounts[lastIndex].n + 1}
}

return keyCounts
return keyCounts, intTotal
}

// convertDDSketchIntoSketch takes a DDSketch and moves its data to a Sketch.
Expand Down Expand Up @@ -155,19 +140,14 @@ func convertDDSketchIntoSketch(c *Config, inputSketch *ddsketch.DDSketch) (*Sket
floatKeyCounts = append(floatKeyCounts, floatKeyCount{k: 0, c: zeroes})

// Generate the integer KeyCount objects from the counts we retrieved
keyCounts := convertFloatCountsToIntCounts(floatKeyCounts)
keyCounts, cnt := convertFloatCountsToIntCounts(floatKeyCounts)

// Populate sparseStore object with the collected keyCounts
// insertCounts will take care of creating multiple uint16 bins for a
// single key if the count overflows uint16
sparseStore.insertCounts(c, keyCounts)

// Create summary object
// Calculate the total count that was inserted in the Sketch
var cnt uint
for _, v := range keyCounts {
cnt += v.n
}
sum := inputSketch.GetSum()
avg := sum / float64(cnt)
max, err := inputSketch.GetMaxValue()
Expand Down
42 changes: 23 additions & 19 deletions pkg/quantile/ddsketch_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,7 @@ func TestCreateDDSketchWithSketchMapping(t *testing.T) {
quantile: sketchtest.UQuadraticQ(-N, 0),
},
{
name: "U-quadratic distribution (a=-N,b=N)",
name: "U-quadratic distribution (a=-N/2,b=N/2)",
quantile: sketchtest.UQuadraticQ(-N/2, N/2),
},
{
Expand Down Expand Up @@ -196,13 +196,9 @@ func TestConvertDDSketchIntoSketch(t *testing.T) {
name: "Uniform distribution (a=0,b=N)",
quantile: sketchtest.UniformQ(0, N),
},
// The p99 for this test fails, likely due to the shift of leftover bucket counts the right that is performed
// during the DDSketch -> Sketch conversion, causing the p99 of the output sketch to fall on 0
// (which means the InEpsilon check returns 1).
{
name: "Uniform distribution (a=-N,b=0)",
quantile: sketchtest.UniformQ(-N, 0),
excludedQuantiles: map[int]bool{99: true},
name: "Uniform distribution (a=-N,b=0)",
quantile: sketchtest.UniformQ(-N, 0),
},
{
name: "Uniform distribution (a=-N,b=N)",
Expand All @@ -213,18 +209,16 @@ func TestConvertDDSketchIntoSketch(t *testing.T) {
quantile: sketchtest.UQuadraticQ(0, N),
},
{
name: "U-quadratic distribution (a=-N,b=N)",
name: "U-quadratic distribution (a=-N/2,b=N/2)",
quantile: sketchtest.UQuadraticQ(-N/2, N/2),
},
{
name: "U-quadratic distribution (a=-N,b=N)",
name: "U-quadratic distribution (a=-N,b=0)",
quantile: sketchtest.UQuadraticQ(-N, 0),
},
// Same as above, p99 fails.
{
name: "Truncated Exponential distribution (a=0,b=N,lambda=1/100)",
quantile: sketchtest.TruncateQ(0, N, sketchtest.ExponentialQ(1.0/100), sketchtest.ExponentialCDF(1.0/100)),
excludedQuantiles: map[int]bool{99: true},
name: "Truncated Exponential distribution (a=0,b=N,lambda=1/100)",
quantile: sketchtest.TruncateQ(0, N, sketchtest.ExponentialQ(1.0/100), sketchtest.ExponentialCDF(1.0/100)),
},
{
name: "Truncated Normal distribution (a=0,b=8,mu=0, sigma=1e-3)",
Expand Down Expand Up @@ -280,12 +274,22 @@ func TestConvertDDSketchIntoSketch(t *testing.T) {
outputSketch, err := convertDDSketchIntoSketch(sketchConfig, convertedSketch)
require.NoError(t, err)

// Conversion accuracy formula taken from:
// https://github.com/DataDog/logs-backend/blob/895e56c9eefa1c28a3affbdd0027f58a4c6f4322/domains/event-store/libs/event-store-aggregate/src/test/java/com/dd/event/store/api/query/sketch/SketchTest.java#L409-L422
inputGamma := (1.0 + convertedSketch.RelativeAccuracy()) / (1.0 - convertedSketch.RelativeAccuracy())
outputGamma := sketchConfig.gamma.v
conversionGamma := inputGamma * outputGamma * outputGamma
conversionRelativeAccuracy := (conversionGamma - 1) / (conversionGamma + 1)
/* We compute the expected bound on the relative error between percentile values before
* and after the conversion.
*
* - The +0.5 in `createDDSketchWithSketchMapping` compensates for a bug in
* `sketchConfig.key` which is not relevant here, creating a systematic bias of half
* a bin.
* - The error on accumulated bin counts caused by the rounding process is bounded by 0.5
* by design. In most circumstances, including the scenarios under test, this can
* shift quantiles by one bin at most.
* - Finally, a bug in the interpolation in `Sketch.Quantile` means the output can be
* off by another half a bin.
* (Unfortunately, other tests, as well as backend code, rely on these bugs, so they are
* not easily fixed.)
* In total, the expected worst case relative error:
*/
conversionRelativeAccuracy := sketchConfig.gamma.v*sketchConfig.gamma.v - 1

// Check the count of the output sketch
assert.InDelta(
Expand Down
Loading