From b1bfb133d40b6f2ff47c2abb9f9e7168f6686014 Mon Sep 17 00:00:00 2001 From: Douglas Q Hawkins Date: Fri, 29 Nov 2024 05:21:44 -0500 Subject: [PATCH] feat(smoke-tests): Adding crash tracking (#7855) * Excluding J9 * Improving PortUtils to exitCode when process ends abnormally * Improving error reporting when process fails to start Wrapped PortUtils.waitForPortToOpen with exception handling, when exception is raised log file indicating error is added to exception message * Suppressed FileNotException in cleanUpSpec, when process exits abormally since no file is expected and it creates noise * Clarifying exception message * Refining exception suppression, some tests clean-up with test processes still running? * Adding ability to selectively suppress crash tracking * Adding some sanity checks that test processes are up and running as expected * Reworking exception handling logic around verifyLog * Modifying exception handling logic around verifyLog in response to review feedback * When process is alive or exited normally, the exception flows through as is * When process exited abnormally, the exception is wrapped in another exception that indicates the abnormal termination --- .../trace/agent/test/utils/PortUtils.java | 12 ++++++++- .../smoketest/AbstractServerSmokeTest.groovy | 25 +++++++++++++++++-- .../smoketest/AbstractSmokeTest.groovy | 18 ++++++++++++- .../datadog/smoketest/ProcessManager.groovy | 12 +++++++++ 4 files changed, 63 insertions(+), 4 deletions(-) diff --git a/dd-java-agent/testing/src/main/groovy/datadog/trace/agent/test/utils/PortUtils.java b/dd-java-agent/testing/src/main/groovy/datadog/trace/agent/test/utils/PortUtils.java index 3f57d25666a..3ec28c72cd5 100644 --- a/dd-java-agent/testing/src/main/groovy/datadog/trace/agent/test/utils/PortUtils.java +++ b/dd-java-agent/testing/src/main/groovy/datadog/trace/agent/test/utils/PortUtils.java @@ -115,7 +115,17 @@ public static void waitForPortToOpen( } if (!process.isAlive()) { - throw new RuntimeException("Process died before port " + port + " was opened"); + int exitCode = process.exitValue(); + if (exitCode != 0) { + throw new RuntimeException( + "Process exited abnormally exitCode=" + + exitCode + + " before port=" + + port + + " was opened"); + } else { + throw new RuntimeException("Process finished before port=" + port + " was opened"); + } } if (isPortOpen(port)) { diff --git a/dd-smoke-tests/src/main/groovy/datadog/smoketest/AbstractServerSmokeTest.groovy b/dd-smoke-tests/src/main/groovy/datadog/smoketest/AbstractServerSmokeTest.groovy index a71b32de9cb..24f995bfa86 100644 --- a/dd-smoke-tests/src/main/groovy/datadog/smoketest/AbstractServerSmokeTest.groovy +++ b/dd-smoke-tests/src/main/groovy/datadog/smoketest/AbstractServerSmokeTest.groovy @@ -67,7 +67,12 @@ abstract class AbstractServerSmokeTest extends AbstractSmokeTest { (0.. def port = httpPorts[idx] def process = testedProcesses[idx] - PortUtils.waitForPortToOpen(port, 240, TimeUnit.SECONDS, process) + + try { + PortUtils.waitForPortToOpen(port, 240, TimeUnit.SECONDS, process) + } catch ( Exception e ) { + throw new RuntimeException(e.getMessage() + " - log file " + logFilePaths[idx], e) + } } } @@ -77,7 +82,23 @@ abstract class AbstractServerSmokeTest extends AbstractSmokeTest { if (null != (outputFile = outputs[idx])) { // check the structures written out to the log, // and fail the run if anything unexpected was recorded - verifyLog(idx, outputFile) + try { + verifyLog(idx, outputFile) + } catch (FileNotFoundException e) { + if (testedProcesses[idx].isAlive()) { + throw e + } + def exitCode = testedProcesses[idx].exitValue() + if (exitCode == 0) { + throw e + } else { + def logFile = logFilePaths[idx] + // highlight when process exited abnormally, since that may have contributed + // to the log verification failure + throw new RuntimeException( + "Server process exited abnormally - exit code: ${exitCode}; check log file: ${logFile}", e) + } + } } } } diff --git a/dd-smoke-tests/src/main/groovy/datadog/smoketest/AbstractSmokeTest.groovy b/dd-smoke-tests/src/main/groovy/datadog/smoketest/AbstractSmokeTest.groovy index 092c5b84fb9..423ab6de925 100644 --- a/dd-smoke-tests/src/main/groovy/datadog/smoketest/AbstractSmokeTest.groovy +++ b/dd-smoke-tests/src/main/groovy/datadog/smoketest/AbstractSmokeTest.groovy @@ -164,13 +164,18 @@ abstract class AbstractSmokeTest extends ProcessManager { "-Ddd.version=${VERSION}" ] + def testCrashTracking() { + return true + } def javaProperties() { + def tmpDir = "/tmp" + def ret = [ "${getMaxMemoryArgumentForFork()}", "${getMinMemoryArgumentForFork()}", "-javaagent:${shadowJarPath}", - isIBM ? "-Xdump:directory=/tmp" : "-XX:ErrorFile=/tmp/hs_err_pid%p.log", + isIBM ? "-Xdump:directory=${tmpDir}" : "-XX:ErrorFile=${tmpDir}/hs_err_pid%p.log", "-Ddd.trace.agent.port=${server.address.port}", "-Ddd.env=${ENV}", "-Ddd.version=${VERSION}", @@ -190,9 +195,20 @@ abstract class AbstractSmokeTest extends ProcessManager { if (testTelemetry()) { ret += "-Ddd.telemetry.heartbeat.interval=5" } + // DQH - Nov 2024 - skipping for J9 which doesn't have full crash tracking support + if (testCrashTracking() && !Platform.isJ9()) { + def extension = getScriptExtension() + ret += "-XX:OnError=\"${tmpDir}/dd_crash_uploader.${extension} %p\"" + // Unlike crash tracking smoke test, keep the default delay; otherwise, otherwise other tests will fail + // ret += "-Ddd.dogstatsd.start-delay=0" + } ret as String[] } + static String getScriptExtension() { + return Platform.isWindows() ? "bat" : "sh" + } + def inferServiceName() { true } diff --git a/dd-smoke-tests/src/main/groovy/datadog/smoketest/ProcessManager.groovy b/dd-smoke-tests/src/main/groovy/datadog/smoketest/ProcessManager.groovy index 3240620fd70..f2993c256f5 100644 --- a/dd-smoke-tests/src/main/groovy/datadog/smoketest/ProcessManager.groovy +++ b/dd-smoke-tests/src/main/groovy/datadog/smoketest/ProcessManager.groovy @@ -196,6 +196,18 @@ abstract class ProcessManager extends Specification { outputThreads.captureOutput(p, new File(logFilePaths[idx])) } testedProcess = numberOfProcesses == 1 ? testedProcesses[0] : null + + + (0.. + def curProc = testedProcesses[idx] + + if ( !curProc.isAlive() && curProc.exitValue() != 0 ) { + def exitCode = curProc.exitValue() + def logFile = logFilePaths[idx] + + throw new RuntimeException("Process exited abormally - exitCode:${exitCode}; logFile=${logFile}") + } + } } String javaPath() {