From 991cb75bf0f2c79ef4c717f7e4b63ddb20b697ee Mon Sep 17 00:00:00 2001 From: Dongjoon Hyun Date: Mon, 12 Aug 2024 10:26:31 -0700 Subject: [PATCH] [SPARK-49197][CORE] Redact `Spark Command` output in `launcher` module ### What changes were proposed in this pull request? This PR aims to redact `Spark Command` output in `launcher` module. ### Why are the changes needed? When `launcher` module shows `Spark Command`, there is no redaction. Although Spark Cluster is supposed to be in a secure environment, this could be collected by a centralized log system. We need to do a proper redaction. **EXAMPLE (Spark History Server)** ``` $ SPARK_NO_DAEMONIZE=1 \ SPARK_HISTORY_OPTS="-Dspark.ui.filters=org.apache.spark.ui.JWSFilter -Dspark.org.apache.spark.ui.JWSFilter.param.secretKey=VmlzaXQgaHR0cHM6Ly9zcGFyay5hcGFjaGUub3JnIHRvIGRvd25sb2FkIEFwYWNoZSBTcGFyay4=" \ sbin/start-history-server.sh ``` **BEFORE** ``` Spark Command: /Users/dongjoon/.jenv/versions/17/bin/java -cp /Users/dongjoon/APACHE/spark-merge/conf/:/Users/dongjoon/APACHE/spark-merge/assembly/target/scala-2.13/jars/slf4j-api-2.0.14.jar:/Users/dongjoon/APACHE/spark-merge/assembly/target/scala-2.13/jars/* \ -Dspark.ui.filters=org.apache.spark.ui.JWSFilter \ -Dspark.org.apache.spark.ui.JWSFilter.param.secretKey=VmlzaXQgaHR0cHM6Ly9zcGFyay5hcGFjaGUub3JnIHRvIGRvd25sb2FkIEFwYWNoZSBTcGFyay4= \ -Xmx1g \ org.apache.spark.deploy.history.HistoryServer ``` **AFTER** ``` ... Spark Command: /Users/dongjoon/.jenv/versions/17/bin/java -cp /Users/dongjoon/APACHE/spark-merge/conf/:/Users/dongjoon/APACHE/spark-merge/assembly/target/scala-2.13/jars/slf4j-api-2.0.14.jar:/Users/dongjoon/APACHE/spark-merge/assembly/target/scala-2.13/jars/* \ -Dspark.ui.filters=org.apache.spark.ui.JWSFilter \ -Dspark.org.apache.spark.ui.JWSFilter.param.secretKey=*********(redacted) \ -Xmx1g \ org.apache.spark.deploy.history.HistoryServer ``` ### Does this PR introduce _any_ user-facing change? This only changes the log messages during startup. ### How was this patch tested? Pass the CIs with newly added test case. ### Was this patch authored or co-authored using generative AI tooling? No. Closes #47704 from dongjoon-hyun/SPARK-49197. Authored-by: Dongjoon Hyun Signed-off-by: Dongjoon Hyun (cherry picked from commit 93cf355ab05d8c9e82e23b8ae065b2f462489e4e) Signed-off-by: Dongjoon Hyun --- .../spark/launcher/CommandBuilderUtils.java | 27 +++++++++++++++++++ .../java/org/apache/spark/launcher/Main.java | 2 +- .../launcher/CommandBuilderUtilsSuite.java | 10 +++++++ 3 files changed, 38 insertions(+), 1 deletion(-) diff --git a/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java b/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java index 172fb8c560876..ff729cd1cb6dc 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java +++ b/launcher/src/main/java/org/apache/spark/launcher/CommandBuilderUtils.java @@ -21,6 +21,9 @@ import java.util.ArrayList; import java.util.List; import java.util.Map; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; /** * Helper methods for command builders. @@ -30,6 +33,11 @@ class CommandBuilderUtils { static final String DEFAULT_MEM = "1g"; static final String DEFAULT_PROPERTIES_FILE = "spark-defaults.conf"; static final String ENV_SPARK_HOME = "SPARK_HOME"; + // This should be consistent with org.apache.spark.internal.config.SECRET_REDACTION_PATTERN + // We maintain this copy to avoid depending on `core` module. + static final String SECRET_REDACTION_PATTERN = "(?i)secret|password|token|access[.]?key"; + static final Pattern redactPattern = Pattern.compile(SECRET_REDACTION_PATTERN); + static final Pattern keyValuePattern = Pattern.compile("-D(.+?)=(.+)"); /** Returns whether the given string is null or empty. */ static boolean isEmpty(String s) { @@ -328,4 +336,23 @@ static String findJarsDir(String sparkHome, String scalaVersion, boolean failIfN return libdir.getAbsolutePath(); } + /** + * Redact a command-line argument's value part which matches `-Dkey=value` pattern. + * Note that this should be consistent with `org.apache.spark.util.Utils.redactCommandLineArgs`. + */ + static List redactCommandLineArgs(List args) { + return args.stream().map(CommandBuilderUtils::redact).collect(Collectors.toList()); + } + + /** + * Redact a command-line argument's value part which matches `-Dkey=value` pattern. + */ + static String redact(String arg) { + Matcher m = keyValuePattern.matcher(arg); + if (m.find() && redactPattern.matcher(m.group(1)).find()) { + return String.format("-D%s=%s", m.group(1), "*********(redacted)"); + } else { + return arg; + } + } } diff --git a/launcher/src/main/java/org/apache/spark/launcher/Main.java b/launcher/src/main/java/org/apache/spark/launcher/Main.java index 6501fc1764c25..321fca0912704 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/Main.java +++ b/launcher/src/main/java/org/apache/spark/launcher/Main.java @@ -114,7 +114,7 @@ private static List buildCommand( boolean printLaunchCommand) throws IOException, IllegalArgumentException { List cmd = builder.buildCommand(env); if (printLaunchCommand) { - System.err.println("Spark Command: " + join(" ", cmd)); + System.err.println("Spark Command: " + join(" ", redactCommandLineArgs(cmd))); System.err.println("========================================"); } return cmd; diff --git a/launcher/src/test/java/org/apache/spark/launcher/CommandBuilderUtilsSuite.java b/launcher/src/test/java/org/apache/spark/launcher/CommandBuilderUtilsSuite.java index 46cdffc190d52..1b2c683880c25 100644 --- a/launcher/src/test/java/org/apache/spark/launcher/CommandBuilderUtilsSuite.java +++ b/launcher/src/test/java/org/apache/spark/launcher/CommandBuilderUtilsSuite.java @@ -68,6 +68,16 @@ public void testInvalidOptionStrings() { testInvalidOpt("'abcde"); } + @Test + public void testRedactCommandLineArgs() { + assertEquals(redact("secret"), "secret"); + assertEquals(redact("-Dk=v"), "-Dk=v"); + assertEquals(redact("-Dk=secret"), "-Dk=secret"); + assertEquals(redact("-DsecretKey=my-secret"), "-DsecretKey=*********(redacted)"); + assertEquals(redactCommandLineArgs(Arrays.asList("-DsecretKey=my-secret")), + Arrays.asList("-DsecretKey=*********(redacted)")); + } + @Test public void testWindowsBatchQuoting() { assertEquals("abc", quoteForBatchScript("abc"));