From c7ba65b94c2ab02f199e161d8655cfdeb2432618 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Thu, 17 May 2018 10:08:12 +0200 Subject: [PATCH 1/3] add version information in case of crash of native ML process --- .../ml/job/process/logging/CppLogMessageHandler.java | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/logging/CppLogMessageHandler.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/logging/CppLogMessageHandler.java index ddafc36416b65..93b4ff795e964 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/logging/CppLogMessageHandler.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/logging/CppLogMessageHandler.java @@ -283,6 +283,15 @@ private void parseMessage(XContent xContent, BytesReference bytesRef) { if (upstreamMessage.contains("bad_alloc")) { upstreamMessage += ", process ran out of memory."; } + + // add version information, so it's conveniently next to the crash log + upstreamMessage += ", version: "; + try { + upstreamMessage += getCppCopyright(Duration.ofMillis(10)); + } catch (TimeoutException timeoutException) { + upstreamMessage += "failed to retrieve"; + } + storeError(upstreamMessage); seenFatalError = true; } catch (IOException e) { From 3c0ea7ac60b605aac53ffa571791017c5ea900f6 Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Thu, 17 May 2018 11:17:05 +0200 Subject: [PATCH 2/3] refactor version information retrieval and use it for crash logging --- .../ml/job/process/NativeController.java | 17 +--------- .../process/logging/CppLogMessageHandler.java | 32 +++++++++++++++++-- 2 files changed, 30 insertions(+), 19 deletions(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/NativeController.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/NativeController.java index a45f44c227657..43c3f4825ddf3 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/NativeController.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/NativeController.java @@ -22,8 +22,6 @@ import java.util.List; import java.util.Map; import java.util.concurrent.TimeoutException; -import java.util.regex.Matcher; -import java.util.regex.Pattern; /** @@ -84,20 +82,7 @@ public long getPid() throws TimeoutException { } public Map getNativeCodeInfo() throws TimeoutException { - String copyrightMessage = cppLogHandler.getCppCopyright(CONTROLLER_CONNECT_TIMEOUT); - Matcher matcher = Pattern.compile("Version (.+) \\(Build ([^)]+)\\) Copyright ").matcher(copyrightMessage); - if (matcher.find()) { - Map info = new HashMap<>(2); - info.put("version", matcher.group(1)); - info.put("build_hash", matcher.group(2)); - return info; - } else { - // If this happens it probably means someone has changed the format in lib/ver/CBuildInfo.cc - // in the machine-learning-cpp repo without changing the pattern above to match - String msg = "Unexpected native controller process copyright format: " + copyrightMessage; - LOGGER.error(msg); - throw new ElasticsearchException(msg); - } + return cppLogHandler.getNativeCodeInfo(CONTROLLER_CONNECT_TIMEOUT); } public void startProcess(List command) throws IOException { diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/logging/CppLogMessageHandler.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/logging/CppLogMessageHandler.java index 93b4ff795e964..4f9cf6485d535 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/logging/CppLogMessageHandler.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/logging/CppLogMessageHandler.java @@ -8,7 +8,7 @@ import org.apache.logging.log4j.Level; import org.apache.logging.log4j.Logger; import org.apache.logging.log4j.message.ParameterizedMessage; -import org.elasticsearch.common.ParsingException; +import org.elasticsearch.ElasticsearchException; import org.elasticsearch.common.Strings; import org.elasticsearch.common.bytes.BytesArray; import org.elasticsearch.common.bytes.BytesReference; @@ -30,10 +30,15 @@ import java.time.Instant; import java.time.temporal.ChronoUnit; import java.util.Deque; +import java.util.HashMap; +import java.util.Locale; +import java.util.Map; import java.util.Objects; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; import java.util.concurrent.TimeoutException; +import java.util.regex.Matcher; +import java.util.regex.Pattern; /** * Handle a stream of C++ log messages that arrive via a named pipe in JSON format. @@ -181,6 +186,26 @@ public String getCppCopyright(Duration timeout) throws TimeoutException { return cppCopyright; } + /** + * Extracts version information from the copyright string which assumes a certain format. + */ + public Map getNativeCodeInfo(Duration timeout) throws TimeoutException { + String copyrightMessage = getCppCopyright(timeout); + Matcher matcher = Pattern.compile("Version (.+) \\(Build ([^)]+)\\) Copyright ").matcher(copyrightMessage); + if (matcher.find()) { + Map info = new HashMap<>(2); + info.put("version", matcher.group(1)); + info.put("build_hash", matcher.group(2)); + return info; + } else { + // If this happens it probably means someone has changed the format in lib/ver/CBuildInfo.cc + // in the machine-learning-cpp repo without changing the pattern above to match + String msg = "Unexpected native process copyright format: " + copyrightMessage; + LOGGER.error(msg); + throw new ElasticsearchException(msg); + } + } + /** * Expected to be called very infrequently. */ @@ -281,13 +306,14 @@ private void parseMessage(XContent xContent, BytesReference bytesRef) { } catch (XContentParseException e) { String upstreamMessage = "Fatal error: '" + bytesRef.utf8ToString() + "'"; if (upstreamMessage.contains("bad_alloc")) { - upstreamMessage += ", process ran out of memory."; + upstreamMessage += ", process ran out of memory"; } // add version information, so it's conveniently next to the crash log upstreamMessage += ", version: "; try { - upstreamMessage += getCppCopyright(Duration.ofMillis(10)); + Map versionInfo = getNativeCodeInfo(Duration.ofMillis(10)); + upstreamMessage += String.format(Locale.ROOT, "%s (build %s)", versionInfo.get("version"), versionInfo.get("build_hash")); } catch (TimeoutException timeoutException) { upstreamMessage += "failed to retrieve"; } From aa9e906495a9f968fa4ef9c8f952a2cf2a0b637d Mon Sep 17 00:00:00 2001 From: Hendrik Muhs Date: Thu, 17 May 2018 11:36:28 +0200 Subject: [PATCH 3/3] improve doc string --- .../xpack/ml/job/process/logging/CppLogMessageHandler.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/logging/CppLogMessageHandler.java b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/logging/CppLogMessageHandler.java index 4f9cf6485d535..af0f199dd0c58 100644 --- a/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/logging/CppLogMessageHandler.java +++ b/x-pack/plugin/ml/src/main/java/org/elasticsearch/xpack/ml/job/process/logging/CppLogMessageHandler.java @@ -199,7 +199,7 @@ public Map getNativeCodeInfo(Duration timeout) throws TimeoutExc return info; } else { // If this happens it probably means someone has changed the format in lib/ver/CBuildInfo.cc - // in the machine-learning-cpp repo without changing the pattern above to match + // in the ml-cpp repo without changing the pattern above to match String msg = "Unexpected native process copyright format: " + copyrightMessage; LOGGER.error(msg); throw new ElasticsearchException(msg);