diff --git a/instrumentation/jvm-metrics/src/main/java/com/splunk/opentelemetry/instrumentation/jvmmetrics/GcMemoryMetrics.java b/instrumentation/jvm-metrics/src/main/java/com/splunk/opentelemetry/instrumentation/jvmmetrics/GcMemoryMetrics.java index c56fc7d9d..5f1e2ba7e 100644 --- a/instrumentation/jvm-metrics/src/main/java/com/splunk/opentelemetry/instrumentation/jvmmetrics/GcMemoryMetrics.java +++ b/instrumentation/jvm-metrics/src/main/java/com/splunk/opentelemetry/instrumentation/jvmmetrics/GcMemoryMetrics.java @@ -52,12 +52,17 @@ public boolean isAvailable() { } public void registerListener() { + registerListener(null); + } + + public void registerListener(GcEventCallback gcEventCallback) { ManagementFactory.getMemoryPoolMXBeans().stream() .filter(pool -> MemoryType.HEAP.equals(pool.getType())) .map(MemoryPoolMXBean::getName) .forEach(heapPoolNames::add); - GcMetricsNotificationListener gcNotificationListener = new GcMetricsNotificationListener(); + GcMetricsNotificationListener gcNotificationListener = + new GcMetricsNotificationListener(gcEventCallback); for (GarbageCollectorMXBean gcBean : ManagementFactory.getGarbageCollectorMXBeans()) { if (!(gcBean instanceof NotificationEmitter)) { continue; @@ -86,6 +91,11 @@ public void close() { } class GcMetricsNotificationListener implements NotificationListener { + private final GcEventCallback gcEventCallback; + + GcMetricsNotificationListener(GcEventCallback gcEventCallback) { + this.gcEventCallback = gcEventCallback; + } @Override public void handleNotification(Notification notification, Object ref) { @@ -101,6 +111,10 @@ public void handleNotification(Notification notification, Object ref) { long usageAfter = sumMemoryUsage(after); deltaSum.addAndGet(usageBefore - usageAfter); + + if (gcEventCallback != null) { + gcEventCallback.handleGcEvent(notificationInfo); + } } private long sumMemoryUsage(Map memoryUsageMap) { @@ -136,4 +150,8 @@ private static boolean isManagementExtensionsPresent() { return false; } } + + public interface GcEventCallback { + void handleGcEvent(GarbageCollectionNotificationInfo notificationInfo); + } } diff --git a/instrumentation/jvm-metrics/src/main/java/com/splunk/opentelemetry/instrumentation/jvmmetrics/JvmMetricsInstaller.java b/instrumentation/jvm-metrics/src/main/java/com/splunk/opentelemetry/instrumentation/jvmmetrics/JvmMetricsInstaller.java index e36a3bd4d..4bfc58f32 100644 --- a/instrumentation/jvm-metrics/src/main/java/com/splunk/opentelemetry/instrumentation/jvmmetrics/JvmMetricsInstaller.java +++ b/instrumentation/jvm-metrics/src/main/java/com/splunk/opentelemetry/instrumentation/jvmmetrics/JvmMetricsInstaller.java @@ -67,6 +67,7 @@ public void afterAgent(AutoConfiguredOpenTelemetrySdk autoConfiguredOpenTelemetr // process.runtime.jvm.memory.limit{pool=} // runtime.jvm.gc.live.data.size is replaced by OTel // process.runtime.jvm.memory.usage_after_last_gc{pool=} + // (temporarily restored to ease migration) // runtime.jvm.gc.memory.allocated is replaced by memory profiling metric // process.runtime.jvm.memory.allocated // runtime.jvm.gc.memory.promoted is removed with no direct replacement @@ -75,7 +76,7 @@ public void afterAgent(AutoConfiguredOpenTelemetrySdk autoConfiguredOpenTelemetr // runtime.jvm.memory.usage.after.gc is replaced by OTel // process.runtime.jvm.memory.usage_after_last_gc{pool=,type=heap} / // process.runtime.jvm.memory.limit{pool=,type=heap} - // runtime.jvm.gc.overhead is something that should to done in a dashboard, not here + // runtime.jvm.gc.overhead is something that should be done in a dashboard, not here new OtelJvmThreadMetrics().install(); } diff --git a/instrumentation/jvm-metrics/src/main/java/com/splunk/opentelemetry/instrumentation/jvmmetrics/otel/OtelGcMemoryMetrics.java b/instrumentation/jvm-metrics/src/main/java/com/splunk/opentelemetry/instrumentation/jvmmetrics/otel/OtelGcMemoryMetrics.java index 7b7fbe4ae..467deade9 100644 --- a/instrumentation/jvm-metrics/src/main/java/com/splunk/opentelemetry/instrumentation/jvmmetrics/otel/OtelGcMemoryMetrics.java +++ b/instrumentation/jvm-metrics/src/main/java/com/splunk/opentelemetry/instrumentation/jvmmetrics/otel/OtelGcMemoryMetrics.java @@ -14,15 +14,57 @@ * limitations under the License. */ +// Includes work from: +/* + * Copyright 2019 VMware, Inc. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + package com.splunk.opentelemetry.instrumentation.jvmmetrics.otel; import static com.splunk.opentelemetry.instrumentation.jvmmetrics.GcMemoryMetrics.METRIC_NAME; +import static io.opentelemetry.api.common.AttributeKey.stringKey; import com.splunk.opentelemetry.instrumentation.jvmmetrics.GcMemoryMetrics; +import com.sun.management.GcInfo; +import io.opentelemetry.api.common.Attributes; +import io.opentelemetry.api.metrics.LongCounter; import io.opentelemetry.api.metrics.Meter; +import java.lang.management.ManagementFactory; +import java.lang.management.MemoryPoolMXBean; +import java.lang.management.MemoryUsage; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicLong; public class OtelGcMemoryMetrics { + private final boolean isGenerationalGc = isGenerationalGcConfigured(); + private final Set longLivedPoolNames = new HashSet<>(); + private AtomicLong liveDataSize; + + public OtelGcMemoryMetrics() { + for (MemoryPoolMXBean mbean : ManagementFactory.getMemoryPoolMXBeans()) { + String name = mbean.getName(); + if (isLongLivedPool(name)) { + longLivedPoolNames.add(name); + } + } + } + public void install() { GcMemoryMetrics gcMemoryMetrics = new GcMemoryMetrics(); if (!gcMemoryMetrics.isAvailable()) { @@ -36,6 +78,139 @@ public void install() { .setDescription("Sum of heap size differences before and after gc.") .buildWithCallback(measurement -> measurement.record(gcMemoryMetrics.getDeltaSum())); - gcMemoryMetrics.registerListener(); + LongCounter gcPauseCounter = + meter + .counterBuilder("runtime.jvm.gc.pause.count") + .setUnit("{gcs}") + .setDescription("Number of gc pauses.") + .build(); + LongCounter gcPauseTime = + meter + .counterBuilder("runtime.jvm.gc.pause.totalTime") + .setUnit("ms") + .setDescription("Time spent in GC pause.") + .build(); + + liveDataSize = new AtomicLong(); + meter + .gaugeBuilder("runtime.jvm.gc.live.data.size") + .setUnit("bytes") + .setDescription("Size of long-lived heap memory pool after reclamation.") + .buildWithCallback(measurement -> measurement.record(liveDataSize.get())); + + gcMemoryMetrics.registerListener( + notificationInfo -> { + GcInfo gcInfo = notificationInfo.getGcInfo(); + String gcName = notificationInfo.getGcName(); + String gcCause = notificationInfo.getGcCause(); + String gcAction = notificationInfo.getGcAction(); + long duration = gcInfo.getDuration(); + if (!isConcurrentPhase(gcCause, gcName)) { + Attributes attributes = + Attributes.of( + stringKey("gc"), + gcName, + stringKey("action"), + gcAction, + stringKey("cause"), + gcCause); + gcPauseCounter.add(1, attributes); + gcPauseTime.add(duration, attributes); + } + + final Map before = gcInfo.getMemoryUsageBeforeGc(); + final Map after = gcInfo.getMemoryUsageAfterGc(); + + long longLivedBefore = + longLivedPoolNames.stream().mapToLong(pool -> before.get(pool).getUsed()).sum(); + long longLivedAfter = + longLivedPoolNames.stream().mapToLong(pool -> after.get(pool).getUsed()).sum(); + + // Some GC implementations such as G1 can reduce the old gen size as part of a minor GC. + // To track the live data size we record the value if we see a reduction in the long-lived + // heap size or after a major/non-generational GC. + if (longLivedAfter < longLivedBefore + || shouldUpdateDataSizeMetrics(notificationInfo.getGcName())) { + liveDataSize.set(longLivedAfter); + } + }); + } + + private static boolean isConcurrentPhase(String cause, String name) { + return "No GC".equals(cause) + || "Shenandoah Cycles".equals(name) + || "ZGC Cycles".equals(name) + || (name.startsWith("GPGC") && !name.endsWith("Pauses")); + } + + private static boolean isLongLivedPool(String name) { + return name != null + && (name.endsWith("Old Gen") + || name.endsWith("Tenured Gen") + || "Shenandoah".equals(name) + || "ZHeap".equals(name) + || name.endsWith("balanced-old") + || name.contains("tenured") // "tenured", + // "tenured-SOA", + // "tenured-LOA" + || "JavaHeap".equals(name) // metronome + ); + } + + private boolean shouldUpdateDataSizeMetrics(String gcName) { + return nonGenerationalGcShouldUpdateDataSize(gcName) || isMajorGenerationalGc(gcName); + } + + private boolean isMajorGenerationalGc(String gcName) { + return GcGenerationAge.fromGcName(gcName) == GcGenerationAge.OLD; + } + + private boolean nonGenerationalGcShouldUpdateDataSize(String gcName) { + return !isGenerationalGc + // Skip Shenandoah and ZGC gc notifications with the name Pauses due + // to missing memory pool size info + && !gcName.endsWith("Pauses"); + } + + private boolean isGenerationalGcConfigured() { + return ManagementFactory.getMemoryPoolMXBeans().stream() + .filter(JvmMemory::isHeap) + .map(MemoryPoolMXBean::getName) + .filter(name -> !name.contains("tenured")) + .count() + > 1; + } + + /** + * Generalization of which parts of the heap are considered "young" or "old" for multiple GC + * implementations + */ + enum GcGenerationAge { + OLD, + YOUNG, + UNKNOWN; + + private static final Map knownCollectors = + new HashMap() { + { + put("ConcurrentMarkSweep", OLD); + put("Copy", YOUNG); + put("G1 Old Generation", OLD); + put("G1 Young Generation", YOUNG); + put("MarkSweepCompact", OLD); + put("PS MarkSweep", OLD); + put("PS Scavenge", YOUNG); + put("ParNew", YOUNG); + put("global", OLD); + put("scavenge", YOUNG); + put("partial gc", YOUNG); + put("global garbage collect", OLD); + put("Epsilon", OLD); + } + }; + + static GcGenerationAge fromGcName(String gcName) { + return knownCollectors.getOrDefault(gcName, UNKNOWN); + } } }