From 949ba888a832fe40b9ed34d5033e14ee1ba5d178 Mon Sep 17 00:00:00 2001 From: Xinyi Yu Date: Tue, 13 Aug 2024 14:32:33 -0700 Subject: [PATCH] [SPARK-49216][CORE] Fix to not log message context with explicitly LogEntry constructed when Structured Logging conf is off ### What changes were proposed in this pull request? To use structured logging developers usually do ```scala logInfo(log"Lost executor ${MDC(LogKeys.EXECUTOR_ID, "1")}."), ``` When the structured logging conf is off, the context is not logged. However if explicitly constructing the LogEntry and calling the public API, ```scala logInfo( MessageWithContext( "Lost executor 1.", new java.util.HashMap[String, String] { put(LogKeys.EXECUTOR_ID.name, "1") } ) ) ``` the conf does not take effect and even when it is off, the context is still logged. This PR creates a fix for both cases to observe the conf, by toggling in the `withLogContext` function. This PR also adds a extra small test on explicitly setting MDC. ### Why are the changes needed? To keep consistency between APIs. ### Does this PR introduce _any_ user-facing change? No. ### How was this patch tested? Added new unit tests. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #47732 from anchovYu/structured-logging-conf. Authored-by: Xinyi Yu Signed-off-by: Gengliang Wang --- .../org/apache/spark/internal/Logging.scala | 10 ++++- .../spark/util/StructuredLoggingSuite.scala | 40 ++++++++++++++++++- 2 files changed, 47 insertions(+), 3 deletions(-) diff --git a/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala b/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala index 8eea9b44da26d..7ffaef0855fd1 100644 --- a/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala +++ b/common/utils/src/main/scala/org/apache/spark/internal/Logging.scala @@ -165,11 +165,17 @@ trait Logging { } protected def withLogContext(context: java.util.HashMap[String, String])(body: => Unit): Unit = { - val threadContext = CloseableThreadContext.putAll(context) + // put into thread context only when structured logging is enabled + val closeableThreadContextOpt = if (Logging.isStructuredLoggingEnabled) { + Some(CloseableThreadContext.putAll(context)) + } else { + None + } + try { body } finally { - threadContext.close() + closeableThreadContextOpt.foreach(_.close()) } } diff --git a/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala b/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala index b3e103f46337c..48951c2084f17 100644 --- a/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala +++ b/common/utils/src/test/scala/org/apache/spark/util/StructuredLoggingSuite.scala @@ -40,7 +40,7 @@ trait LoggingSuiteBase } // Return the newly added log contents in the log file after executing the function `f` - private def captureLogOutput(f: () => Unit): String = { + protected def captureLogOutput(f: () => Unit): String = { val content = if (logFile.exists()) { Files.readString(logFile.toPath) } else { @@ -438,6 +438,44 @@ class StructuredLoggingSuite extends LoggingSuiteBase { assert((log"\r" + log"\n" + log"\t" + log"\b").message == "\r\n\t\b") assert((log"\r${MDC(LogKeys.EXECUTOR_ID, 1)}\n".message == "\r1\n")) } + + test("disabled structured logging won't log context") { + Logging.disableStructuredLogging() + val expectedPatternWithoutContext = compactAndToRegexPattern( + s""" + { + "ts": "", + "level": "INFO", + "msg": "Lost executor 1.", + "logger": "$className" + }""") + + Seq( + () => logInfo(log"Lost executor ${MDC(LogKeys.EXECUTOR_ID, "1")}."), + () => logInfo( // blocked when explicitly constructing the MessageWithContext + MessageWithContext( + "Lost executor 1.", + new java.util.HashMap[String, String] { put(LogKeys.EXECUTOR_ID.name, "1") } + ) + ) + ).foreach { f => + val logOutput = captureLogOutput(f) + assert(expectedPatternWithoutContext.r.matches(logOutput)) + } + Logging.enableStructuredLogging() + } + + test("setting to MDC gets logged") { + val mdcPattern = s""""${LogKeys.DATA.name}":"some-data"""" + + org.slf4j.MDC.put(LogKeys.DATA.name, "some-data") + val logOutputWithMDCSet = captureLogOutput(() => logInfo(msgWithMDC)) + assert(mdcPattern.r.findFirstIn(logOutputWithMDCSet).isDefined) + + org.slf4j.MDC.remove(LogKeys.DATA.name) + val logOutputWithoutMDCSet = captureLogOutput(() => logInfo(msgWithMDC)) + assert(mdcPattern.r.findFirstIn(logOutputWithoutMDCSet).isEmpty) + } } object CustomLogKeys {