Skip to content

Commit

Permalink
Fix rank calculation in Summary::quantile()
Browse files Browse the repository at this point in the history
- Changes the rank calculation in Summary::quantile() to 'q * total'
from 'floor(q * (total - 1))'. The 'floor' in the previous calculation
was implicit due to the cast to u64.

- Changes the condition for mapping to the 'negative', 'positive', or
'zeroes' sketch to be '<=' instead of '<'.

- Defers rounding the quantile to the nearest rank to the underlying
sketch. This change is not necessary for correctness, but could be
desireable. For example, deferring this operation could allow the
underlying sketch to perform interpolation.

- Fixes incorrect assertions in test_basics(), which pass with the
changes to the rank calculation and quantile mapping.

The previous method could incorrectly map a request for a value at a
particular rank to the adjacent rank in some cases. Consider a
distribution with the values {0.0, 1.0}. In this case, any request for a
quantile < 1.0 would map to the 'zeroes' sketch. for example, the rank
for q = 0.9 is computed as 'floor(0.9 * (2 - 1)) = floor(0.9) = 0'.
This would map to the 'zeroes' sketch while it should instead map to a
request for q = 0.8 in the positive sketch. The new method would give us
a rank of '0.9 * 2 = 1.8' which correctly maps to 0.8 in the positive
sketch.

The condition for mapping to the appropriate sketch now uses the '<='
operator instead of the '<' operator. Consider a request for q = 0.5,
with the distribution {-1.0, 1.0}. The '<=' operator ensures that this
request is mapped to q = 0.0 in the negative sketch (would be 1.0, but
we flip the distribution) rather than q = 0.0 in the positive sketch.

The mapping for q = 0.0 was handled as a special case in a prior commit
to allow for the updated mapping, as q = 0.0 would otherwise always map
to the negative sketch, whether or not the negative sketch had any
values.

Both of the cases described above are now covered (with a different
distribution) in test_basics().
  • Loading branch information
Dan Wilbanks committed May 27, 2022
1 parent d215b30 commit 5274708
Showing 1 changed file with 48 additions and 17 deletions.
65 changes: 48 additions & 17 deletions metrics-util/src/summary.rs
Original file line number Diff line number Diff line change
Expand Up @@ -133,22 +133,22 @@ impl Summary {
return Some(self.max());
}

let ncount = self.negative.count();
let pcount = self.positive.count();
let zcount = self.zeroes;
let total = ncount + pcount + zcount;
let rank = (q * (total - 1) as f64) as usize;
let ncount = self.negative.count() as f64;
let pcount = self.positive.count() as f64;
let zcount = self.zeroes as f64;
// Defer rounding to the underlying sketch
let rank = q * (ncount + pcount + zcount);

if rank < ncount {
if rank <= ncount {
// Quantile lands in the negative side.
let nq = 1.0 - (rank as f64 / ncount as f64);
let nq = 1.0 - (rank / ncount as f64);
self.negative.quantile(nq).expect("quantile should be valid at this point").map(|v| -v)
} else if rank >= ncount && rank < (ncount + zcount) {
} else if rank <= (ncount + zcount) {
// Quantile lands in the zero band.
Some(0.0)
} else {
// Quantile lands in the positive side.
let pq = (rank - (ncount + zcount)) as f64 / pcount as f64;
let pq = (rank - (ncount + zcount)) / pcount;
self.positive.quantile(pq).expect("quantile should be valid at this point")
}
}
Expand Down Expand Up @@ -206,31 +206,62 @@ mod tests {

#[test]
fn test_basics() {
let mut summary = Summary::with_defaults();
let alpha = 0.0001;
let max_bins = 32_768;
let min_value = 1.0e-9;
let mut summary = Summary::new(alpha, max_bins, min_value);
assert!(summary.is_empty());

// Stretch the legs with a single value.
summary.add(-420.42);
assert_eq!(summary.count(), 1);
assert_relative_eq!(summary.min(), -420.42);
assert_relative_eq!(summary.max(), -420.42);
assert_abs_diff_eq!(summary.quantile(0.1).expect("value should exist"), -420.42);
assert_abs_diff_eq!(summary.quantile(0.5).expect("value should exist"), -420.42);
assert_abs_diff_eq!(summary.quantile(0.99).expect("value should exist"), -420.42);
let alpha = 0.001;

let test_cases = vec![(0.1, -420.42), (0.5, -420.42), (0.9, -420.42)];
for (q, val) in test_cases {
assert_relative_eq!(
summary.quantile(q).expect("value should exist"),
val,
max_relative = 2.0 * alpha * val
);
}

summary.add(420.42);
assert_eq!(summary.count(), 2);
assert_relative_eq!(summary.min(), -420.42);
assert_relative_eq!(summary.max(), 420.42);
assert_abs_diff_eq!(summary.quantile(0.49).expect("value should exist"), -420.42);
assert_relative_eq!(
summary.quantile(0.5).expect("value should exist"),
-420.42,
max_relative = 2.0 * alpha * 420.42
);
assert_relative_eq!(
summary.quantile(0.51).expect("value should exist"),
420.42,
max_relative = 2.0 * alpha * 420.42
);

summary.add(42.42);
assert_eq!(summary.count(), 3);
assert_relative_eq!(summary.min(), -420.42);
assert_relative_eq!(summary.max(), 420.42);
assert_abs_diff_eq!(summary.quantile(0.4999999999).expect("value should exist"), -420.42);
assert_abs_diff_eq!(summary.quantile(0.5).expect("value should exist"), 42.42);
assert_abs_diff_eq!(summary.quantile(0.9999999999).expect("value should exist"), 42.42);

let test_cases = vec![
(0.333333, -420.42),
(0.333334, 42.42),
(0.666666, 42.42),
(0.666667, 420.42),
(0.999999, 420.42),
];
for (q, val) in test_cases {
assert_relative_eq!(
summary.quantile(q).expect("value should exist"),
val,
max_relative = 2.0 * alpha * val
);
}
}

#[test]
Expand Down

0 comments on commit 5274708

Please sign in to comment.