Skip to content

Commit

Permalink
Fix LatencyMetrics parsing to take into account that getCount() retur…
Browse files Browse the repository at this point in the history
…ns different reservoir values for 3.11/4.0 and 4.1/5.0. In latter, the reservoir stores in microseconds while in the first case, they are nanoseconds.
  • Loading branch information
burmanm committed Oct 28, 2024
1 parent a72fb25 commit 611b36d
Show file tree
Hide file tree
Showing 4 changed files with 34 additions and 17 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ Changelog for Management API, new PRs should update the `main / unreleased` sect
* [FEATURE] [#549](https://github.com/k8ssandra/management-api-for-apache-cassandra/issues/549) Add DSE 6.9.3 to the build matrix
* [ENHANCEMENT] [#552](https://github.com/k8ssandra/management-api-for-apache-cassandra/issues/552) Improve "liveness" probe implementation
* [BUGFIX] [#553](https://github.com/k8ssandra/management-api-for-apache-cassandra/issues/553) Fix CassandraTaskExports metric filtering to make it work with 5.0.x Major compactions
* [BUGFIX] [#560](https://github.com/k8ssandra/management-api-for-apache-cassandra/issues/560) Fix LatencyMetrics bucketing on 4.1 and 5.0 as their reservoir stores the data in microseconds, not nano (unlike 3.11 and 4.0)

## v0.1.87 (2024-10-02)
* [FEATURE] [#535](https://github.com/k8ssandra/management-api-for-apache-cassandra/issues/535) Add Cassandra 5.0.0 to the build matrix
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
import java.util.function.Supplier;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.apache.cassandra.utils.CassandraVersion;
import org.apache.cassandra.utils.EstimatedHistogram;
import org.apache.cassandra.utils.FBUtilities;
import org.slf4j.LoggerFactory;
Expand All @@ -55,6 +56,8 @@ public class CassandraMetricRegistryListener implements MetricRegistryListener {
private static final String SERVER_VERSION = FBUtilities.getReleaseVersionString();
private static final int SERVER_PATCH_VERSION;

private boolean microLatencyBuckets = false;

static {
Matcher matcher = VERSION_PATTERN.matcher(SERVER_VERSION);
if (matcher.matches()) {
Expand Down Expand Up @@ -83,6 +86,13 @@ public CassandraMetricRegistryListener(
parser = CassandraMetricNameParser.getDefaultParser(config);
cache = new ConcurrentHashMap<>();

CassandraVersion cassandraVersion = new CassandraVersion(FBUtilities.getReleaseVersionString());

if (cassandraVersion.major > 4 || (cassandraVersion.major == 4 && cassandraVersion.minor > 0)) {
// 4.1 and up should use microsecond buckets
microLatencyBuckets = true;
}

this.familyCache = familyCache;
}

Expand Down Expand Up @@ -372,7 +382,6 @@ private void setTimerFiller(
}
buckets = (long[]) bucketOffsetField.get(snapshot);
} catch (NoSuchFieldException | IllegalAccessException e) {
logger.debug("Unable to get bucketOffsets", e);
buckets = CassandraMetricsTools.DECAYING_BUCKETS;
}

Expand Down Expand Up @@ -407,8 +416,9 @@ private void setTimerFiller(
int outputIndex = 0; // output index
long cumulativeCount = 0;
for (int i = 0; i < buckets.length; i++) {
int offsetFix = microLatencyBuckets ? 1 : 1000;
if (outputIndex < LATENCY_OFFSETS.length
&& buckets[i] > (LATENCY_OFFSETS[outputIndex] * 1000)) {
&& buckets[i] > (LATENCY_OFFSETS[outputIndex] * offsetFix)) {
List<String> labelValues = new ArrayList<>(bucket.getLabelValues().size() + 1);
int j = 0;
for (; j < bucket.getLabelValues().size(); j++) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@
import org.apache.cassandra.locator.IEndpointSnitch;
import org.apache.cassandra.service.StorageService;
import org.apache.cassandra.utils.EstimatedHistogram;
import org.apache.cassandra.utils.Pair;

public class CassandraMetricsTools {

Expand Down Expand Up @@ -63,7 +62,6 @@ public class CassandraMetricsTools {
protected static final long[] DECAYING_BUCKETS = new EstimatedHistogram(165).getBucketOffsets();

// Log linear buckets (these must match the collectd entry in types.db)
protected static final Pair<Long, String>[] LATENCY_BUCKETS;
protected static final long[] LATENCY_OFFSETS = {
35, 60, 103, 179, 310, 535, 924, 1597, 2759, 4768, 8239, 14237, 24601, 42510, 73457, 126934,
219342, 379022, 654949, 1131752, 1955666, 3379391, 5839588, 10090808, 17436917
Expand All @@ -77,16 +75,6 @@ public class CassandraMetricsTools {
}
}

static {
LATENCY_BUCKETS = new Pair[LATENCY_OFFSETS.length];
for (int i = 0; i < LATENCY_BUCKETS.length; i++) {
// Latencies are reported in nanoseconds, so we convert the offsets from micros
// to nanos
LATENCY_BUCKETS[i] =
Pair.create(LATENCY_OFFSETS[i] * 1000, "bucket_" + Long.toString(LATENCY_OFFSETS[i]));
}
}

private Map<String, CassandraMetricDefinition> metricDefinitions;

public CassandraMetricsTools() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
*/
package io.k8ssandra.metrics.builder;

import static io.k8ssandra.metrics.builder.CassandraMetricsTools.LATENCY_BUCKETS;
import static io.k8ssandra.metrics.builder.CassandraMetricsTools.LATENCY_OFFSETS;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertTrue;

Expand All @@ -22,6 +22,7 @@
import java.util.concurrent.TimeUnit;
import org.apache.cassandra.metrics.CassandraMetricsRegistry;
import org.apache.cassandra.metrics.DefaultNameFactory;
import org.apache.cassandra.metrics.LatencyMetrics;
import org.junit.Test;

public class MetricsRegistryTest {
Expand Down Expand Up @@ -125,7 +126,7 @@ public void timerTest() throws Exception {

List<Collector.MetricFamilySamples> collect = exporter.collect();

HashMap<String, Double> bucketMap = new HashMap<>(LATENCY_BUCKETS.length + 1);
HashMap<String, Double> bucketMap = new HashMap<>(LATENCY_OFFSETS.length + 1);
Double countValue = -1.0D;
Double sumValue = -1.0D;
for (Collector.MetricFamilySamples mfs : collect) {
Expand All @@ -140,14 +141,31 @@ public void timerTest() throws Exception {
}
}

assertEquals(LATENCY_BUCKETS.length + 1, bucketMap.keySet().size());
assertEquals(LATENCY_OFFSETS.length + 1, bucketMap.keySet().size());
assertEquals(countValue, bucketMap.get("+Inf"));
assertTrue(countValue > 0);
assertTrue(sumValue > 0);

registry.remove(createMetricName("test_timer"));
}

@Test
public void timerBucketTest() throws Exception {
CassandraMetricsRegistry registry = CassandraMetricsRegistry.Metrics;
Configuration config = new Configuration();
CassandraDropwizardExports exporter = new CassandraDropwizardExports(registry, config);

LatencyMetrics latencyMetrics = new LatencyMetrics("ClientRequest", "TimerTest");
latencyMetrics.addNano(TimeUnit.NANOSECONDS.convert(1, TimeUnit.MILLISECONDS));
latencyMetrics.addNano(TimeUnit.NANOSECONDS.convert(3, TimeUnit.MILLISECONDS));

List<Collector.MetricFamilySamples> collect = exporter.collect();
assertEquals(4000.0, collect.get(0).samples.get(0).value, 0.01);
assertEquals(1.0, collect.get(1).samples.get(7).value, 0.01);
assertEquals(2.0, collect.get(1).samples.get(9).value, 0.01);
assertEquals(2.0, collect.get(1).samples.get(collect.get(1).samples.size() - 1).value, 0.01);
}

private CassandraMetricsRegistry.MetricName createMetricName(String name) {
return DefaultNameFactory.createMetricName("test", name, "test");
}
Expand Down

0 comments on commit 611b36d

Please sign in to comment.