Skip to content

Commit

Permalink
[KYUUBI #6413] SPARK_HOME detection supports Spark 4
Browse files Browse the repository at this point in the history
# Description

When `SPARK_HOME` is not set explicitly, the Kyuubi server supports detecting it based on Scala versions, while the rules are not applicable for Spark 4.

This PR enhances the SPARK_HOME detection logic to make it support both Spark 3 and Spark 4.

The above logic is mainly used for testing purposes, the change does not affect users who configure `SPARK_HOME` in `kyuubi-env.sh`.

## Types of changes

- [ ] Bugfix (non-breaking change which fixes an issue)
- [x] New feature (non-breaking change which adds functionality)
- [ ] Breaking change (fix or feature that would cause existing functionality to change)

## Test Plan

#### Related Unit Tests

- `SparkProcessBuilderSuite`

---

# Checklist 📝

- [x] This patch was not authored or co-authored using [Generative Tooling](https://www.apache.org/legal/generative-tooling.html)

**Be nice. Be informative.**

Closes #6413 from pan3793/spark4-home.

Closes #6413

20e71fd [Cheng Pan] SPARK_HOME detection supports Spark 4

Authored-by: Cheng Pan <chengpan@apache.org>
Signed-off-by: Cheng Pan <chengpan@apache.org>
  • Loading branch information
pan3793 committed May 23, 2024
1 parent a95ff12 commit b89c185
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 28 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -117,11 +117,11 @@ class SparkProcessBuilder(
}

override protected lazy val engineHomeDirFilter: FileFilter = file => {
val r = SCALA_COMPILE_VERSION match {
case "2.12" => SPARK_HOME_REGEX_SCALA_212
case "2.13" => SPARK_HOME_REGEX_SCALA_213
val patterns = SCALA_COMPILE_VERSION match {
case "2.12" => Seq(SPARK3_HOME_REGEX_SCALA_212)
case "2.13" => Seq(SPARK3_HOME_REGEX_SCALA_213, SPARK4_HOME_REGEX_SCALA_213)
}
file.isDirectory && r.findFirstMatchIn(file.getName).isDefined
file.isDirectory && patterns.exists(_.findFirstMatchIn(file.getName).isDefined)
}

override protected[kyuubi] lazy val commands: Iterable[String] = {
Expand Down Expand Up @@ -364,11 +364,14 @@ object SparkProcessBuilder {
final private val SPARK_CONF_FILE_NAME = "spark-defaults.conf"

final private[kyuubi] val SPARK_CORE_SCALA_VERSION_REGEX =
"""^spark-core_(\d\.\d+).*.jar$""".r
"""^spark-core_(\d\.\d+)-.*\.jar$""".r

final private[kyuubi] val SPARK_HOME_REGEX_SCALA_212 =
"""^spark-\d+\.\d+\.\d+-bin-hadoop\d+(\.\d+)?$""".r
final private[kyuubi] val SPARK3_HOME_REGEX_SCALA_212 =
"""^spark-3\.\d+\.\d+-bin-hadoop\d+(\.\d+)?$""".r

final private[kyuubi] val SPARK_HOME_REGEX_SCALA_213 =
"""^spark-\d+\.\d+\.\d+-bin-hadoop\d(\.\d+)?+-scala\d+(\.\d+)?$""".r
final private[kyuubi] val SPARK3_HOME_REGEX_SCALA_213 =
"""^spark-3\.\d+\.\d+-bin-hadoop\d(\.\d+)?+-scala2\.13$""".r

final private[kyuubi] val SPARK4_HOME_REGEX_SCALA_213 =
"""^spark-4\.\d+\.\d+(-\w*)?-bin-hadoop\d(\.\d+)?+$""".r
}
Original file line number Diff line number Diff line change
Expand Up @@ -403,36 +403,43 @@ class SparkProcessBuilderSuite extends KerberizedTestHelper with MockitoSugar {
"spark-core_2.13-3.5.0-abc-20230921.jar",
"spark-core_2.13-3.5.0-xyz-1.2.3.jar",
"spark-core_2.13-3.5.0.1.jar",
"spark-core_2.13.2-3.5.0.jar").foreach { f =>
"spark-core_2.13-4.0.0-preview1.jar",
"spark-core_2.13-4.0.0.jar").foreach { f =>
assertResult("2.13")(builder.extractSparkCoreScalaVersion(Seq(f)))
}

Seq(
"spark-dummy_2.13-3.5.0.jar",
"spark-core_2.13-3.5.0.1.zip",
"yummy-spark-core_2.13-3.5.0.jar").foreach { f =>
"yummy-spark-core_2.13-3.5.0.jar",
"spark-core_2.13.2-3.5.0.jar").foreach { f =>
assertThrows[KyuubiException](builder.extractSparkCoreScalaVersion(Seq(f)))
}
}

test("match scala version of spark home") {
SCALA_COMPILE_VERSION match {
case "2.12" => Seq(
"spark-3.2.4-bin-hadoop3.2",
"spark-3.2.4-bin-hadoop2.7",
"spark-3.4.1-bin-hadoop3")
.foreach { sparkHome =>
assertMatches(sparkHome, SPARK_HOME_REGEX_SCALA_212)
assertNotMatches(sparkHome, SPARK_HOME_REGEX_SCALA_213)
}
case "2.13" => Seq(
"spark-3.2.4-bin-hadoop3.2-scala2.13",
"spark-3.4.1-bin-hadoop3-scala2.13",
"spark-3.5.0-bin-hadoop3-scala2.13")
.foreach { sparkHome =>
assertMatches(sparkHome, SPARK_HOME_REGEX_SCALA_213)
assertNotMatches(sparkHome, SPARK_HOME_REGEX_SCALA_212)
}
Seq(
"spark-3.2.4-bin-hadoop3.2",
"spark-3.2.4-bin-hadoop2.7",
"spark-3.4.1-bin-hadoop3").foreach { SPARK3_HOME_SCALA_212 =>
assertMatches(SPARK3_HOME_SCALA_212, SPARK3_HOME_REGEX_SCALA_212)
assertNotMatches(SPARK3_HOME_SCALA_212, SPARK3_HOME_REGEX_SCALA_213)
assertNotMatches(SPARK3_HOME_SCALA_212, SPARK4_HOME_REGEX_SCALA_213)
}
Seq(
"spark-3.2.4-bin-hadoop3.2-scala2.13",
"spark-3.4.1-bin-hadoop3-scala2.13",
"spark-3.5.0-bin-hadoop3-scala2.13").foreach { SPARK3_HOME_SCALA_213 =>
assertMatches(SPARK3_HOME_SCALA_213, SPARK3_HOME_REGEX_SCALA_213)
assertNotMatches(SPARK3_HOME_SCALA_213, SPARK3_HOME_REGEX_SCALA_212)
assertNotMatches(SPARK3_HOME_SCALA_213, SPARK4_HOME_REGEX_SCALA_213)
}
Seq(
"spark-4.0.0-preview1-bin-hadoop3",
"spark-4.0.0-bin-hadoop3").foreach { SPARK4_HOME_SCALA_213 =>
assertMatches(SPARK4_HOME_SCALA_213, SPARK4_HOME_REGEX_SCALA_213)
assertNotMatches(SPARK4_HOME_SCALA_213, SPARK3_HOME_REGEX_SCALA_212)
assertNotMatches(SPARK4_HOME_SCALA_213, SPARK3_HOME_REGEX_SCALA_213)
}
}

Expand Down

0 comments on commit b89c185

Please sign in to comment.