Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

SPARK_HOME detection supports Spark 4 #6413

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,11 @@ class SparkProcessBuilder(
}

override protected lazy val engineHomeDirFilter: FileFilter = file => {
val r = SCALA_COMPILE_VERSION match {
case "2.12" => SPARK_HOME_REGEX_SCALA_212
case "2.13" => SPARK_HOME_REGEX_SCALA_213
val patterns = SCALA_COMPILE_VERSION match {
case "2.12" => Seq(SPARK3_HOME_REGEX_SCALA_212)
case "2.13" => Seq(SPARK3_HOME_REGEX_SCALA_213, SPARK4_HOME_REGEX_SCALA_213)
}
file.isDirectory && r.findFirstMatchIn(file.getName).isDefined
file.isDirectory && patterns.exists(_.findFirstMatchIn(file.getName).isDefined)
}

override protected[kyuubi] lazy val commands: Iterable[String] = {
Expand Down Expand Up @@ -364,11 +364,14 @@ object SparkProcessBuilder {
final private val SPARK_CONF_FILE_NAME = "spark-defaults.conf"

final private[kyuubi] val SPARK_CORE_SCALA_VERSION_REGEX =
"""^spark-core_(\d\.\d+).*.jar$""".r
"""^spark-core_(\d\.\d+)-.*\.jar$""".r

final private[kyuubi] val SPARK_HOME_REGEX_SCALA_212 =
"""^spark-\d+\.\d+\.\d+-bin-hadoop\d+(\.\d+)?$""".r
final private[kyuubi] val SPARK3_HOME_REGEX_SCALA_212 =
"""^spark-3\.\d+\.\d+-bin-hadoop\d+(\.\d+)?$""".r

final private[kyuubi] val SPARK_HOME_REGEX_SCALA_213 =
"""^spark-\d+\.\d+\.\d+-bin-hadoop\d(\.\d+)?+-scala\d+(\.\d+)?$""".r
final private[kyuubi] val SPARK3_HOME_REGEX_SCALA_213 =
"""^spark-3\.\d+\.\d+-bin-hadoop\d(\.\d+)?+-scala2\.13$""".r

final private[kyuubi] val SPARK4_HOME_REGEX_SCALA_213 =
"""^spark-4\.\d+\.\d+(-\w*)?-bin-hadoop\d(\.\d+)?+$""".r
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

SPARK_HOME detection should be conservative since it is only used for testing purposes, and users/developers can always set SPARK_HOME explicitly to override it. While SPARK_CORE_SCALA_VERSION_REGEX is used to detect the Scala version, it should consider vender-provided Spark distributions too.

}
Original file line number Diff line number Diff line change
Expand Up @@ -403,36 +403,43 @@ class SparkProcessBuilderSuite extends KerberizedTestHelper with MockitoSugar {
"spark-core_2.13-3.5.0-abc-20230921.jar",
"spark-core_2.13-3.5.0-xyz-1.2.3.jar",
"spark-core_2.13-3.5.0.1.jar",
"spark-core_2.13.2-3.5.0.jar").foreach { f =>
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this actually is a negative case.

"spark-core_2.13-4.0.0-preview1.jar",
"spark-core_2.13-4.0.0.jar").foreach { f =>
assertResult("2.13")(builder.extractSparkCoreScalaVersion(Seq(f)))
}

Seq(
"spark-dummy_2.13-3.5.0.jar",
"spark-core_2.13-3.5.0.1.zip",
"yummy-spark-core_2.13-3.5.0.jar").foreach { f =>
"yummy-spark-core_2.13-3.5.0.jar",
"spark-core_2.13.2-3.5.0.jar").foreach { f =>
assertThrows[KyuubiException](builder.extractSparkCoreScalaVersion(Seq(f)))
}
}

test("match scala version of spark home") {
SCALA_COMPILE_VERSION match {
case "2.12" => Seq(
"spark-3.2.4-bin-hadoop3.2",
"spark-3.2.4-bin-hadoop2.7",
"spark-3.4.1-bin-hadoop3")
.foreach { sparkHome =>
assertMatches(sparkHome, SPARK_HOME_REGEX_SCALA_212)
assertNotMatches(sparkHome, SPARK_HOME_REGEX_SCALA_213)
}
case "2.13" => Seq(
"spark-3.2.4-bin-hadoop3.2-scala2.13",
"spark-3.4.1-bin-hadoop3-scala2.13",
"spark-3.5.0-bin-hadoop3-scala2.13")
.foreach { sparkHome =>
assertMatches(sparkHome, SPARK_HOME_REGEX_SCALA_213)
assertNotMatches(sparkHome, SPARK_HOME_REGEX_SCALA_212)
}
Seq(
"spark-3.2.4-bin-hadoop3.2",
"spark-3.2.4-bin-hadoop2.7",
"spark-3.4.1-bin-hadoop3").foreach { SPARK3_HOME_SCALA_212 =>
assertMatches(SPARK3_HOME_SCALA_212, SPARK3_HOME_REGEX_SCALA_212)
assertNotMatches(SPARK3_HOME_SCALA_212, SPARK3_HOME_REGEX_SCALA_213)
assertNotMatches(SPARK3_HOME_SCALA_212, SPARK4_HOME_REGEX_SCALA_213)
}
Seq(
"spark-3.2.4-bin-hadoop3.2-scala2.13",
"spark-3.4.1-bin-hadoop3-scala2.13",
"spark-3.5.0-bin-hadoop3-scala2.13").foreach { SPARK3_HOME_SCALA_213 =>
assertMatches(SPARK3_HOME_SCALA_213, SPARK3_HOME_REGEX_SCALA_213)
assertNotMatches(SPARK3_HOME_SCALA_213, SPARK3_HOME_REGEX_SCALA_212)
assertNotMatches(SPARK3_HOME_SCALA_213, SPARK4_HOME_REGEX_SCALA_213)
}
Seq(
"spark-4.0.0-preview1-bin-hadoop3",
"spark-4.0.0-bin-hadoop3").foreach { SPARK4_HOME_SCALA_213 =>
assertMatches(SPARK4_HOME_SCALA_213, SPARK4_HOME_REGEX_SCALA_213)
assertNotMatches(SPARK4_HOME_SCALA_213, SPARK3_HOME_REGEX_SCALA_212)
assertNotMatches(SPARK4_HOME_SCALA_213, SPARK3_HOME_REGEX_SCALA_213)
}
}

Expand Down
Loading