Skip to content

Commit

Permalink
Cleaned up build.sbt; cleaned up test
Browse files Browse the repository at this point in the history
  • Loading branch information
scottsand-db committed Sep 11, 2023
1 parent df17d48 commit c918220
Show file tree
Hide file tree
Showing 2 changed files with 66 additions and 21 deletions.
45 changes: 30 additions & 15 deletions build.sbt
Original file line number Diff line number Diff line change
Expand Up @@ -322,20 +322,35 @@ val icebergSparkRuntimeArtifactName = {
}

lazy val testDeltaIcebergJar = (project in file("testDeltaIcebergJar"))
// delta-iceberg depends on delta-spark! So, we need to include it during our test.
.dependsOn(spark % "test")
.settings(
name := "test-delta-iceberg-jar",
commonSettings,
skipReleaseSettings,
exportJars := true,
Compile / unmanagedJars += (iceberg / assembly).value,
libraryDependencies ++= Seq(
"org.apache.hadoop" % "hadoop-client" % hadoopVersion,
"org.scalatest" %% "scalatest" % scalaTestVersion % "test",
"org.apache.spark" %% "spark-core" % sparkVersion
"org.apache.spark" %% "spark-core" % sparkVersion % "test"
)
)

val deltaIcebergSparkIncludePrefixes = Seq(
// We want everything from this package
"org/apache/spark/sql/delta/icebergShaded",

// We only want the files in this project from this package. e.g. we want to exclude
// org/apache/spark/sql/delta/commands/convert/ConvertTargetFile.class (from delta-spark project).
"org/apache/spark/sql/delta/commands/convert/IcebergFileManifest",
"org/apache/spark/sql/delta/commands/convert/IcebergSchemaUtils",
"org/apache/spark/sql/delta/commands/convert/IcebergTable"
)

// Build using: build/sbt clean icebergShaded/compile iceberg/compile
// It will fail the first time, just re-run it.
// scalastyle:off println
lazy val iceberg = (project in file("iceberg"))
.dependsOn(spark % "compile->compile;test->test;provided->provided")
.settings (
Expand All @@ -358,7 +373,7 @@ lazy val iceberg = (project in file("iceberg"))
assembly / test := {},
assembly / assemblyExcludedJars := {
// Note: the input here is only `libraryDependencies` jars, not `.dependsOn(_)` jars.
val includes = Seq(
val allowedJars = Seq(
"iceberg-shaded_2.12-3.0.0-SNAPSHOT.jar",
"scala-library-2.12.15.jar",
"scala-collection-compat_2.12-2.1.1.jar",
Expand All @@ -372,10 +387,8 @@ lazy val iceberg = (project in file("iceberg"))

// Return `true` when we want the jar `f` to be excluded from the assembly jar
cp.filter { f =>
val doExclude = !includes.contains(f.data.getName)
// scalastyle:off println
println(s"${f.data.getName} :: exclude? $doExclude")
// scalastyle:on println
val doExclude = !allowedJars.contains(f.data.getName)
println(s"Excluding jar: ${f.data.getName} ? $doExclude")
doExclude
}
},
Expand All @@ -387,22 +400,24 @@ lazy val iceberg = (project in file("iceberg"))
// - delta-storage will bring in classes: io/delta/storage
// - delta-spark will bring in classes: io/delta/exceptions/, io/delta/implicits,
// io/delta/package, io/delta/sql, io/delta/tables,
println(s"BBBB io/delta/$xs")
println(s"Discarding class: io/delta/${xs.mkString("/")}")
MergeStrategy.discard
case PathList("com", "databricks", xs @ _*) =>
// delta-spark will bring in com/databricks/spark/util
println(s"Discarding class: com/databricks/${xs.mkString("/")}")
MergeStrategy.discard
case PathList("org", "apache", "spark", xs @ _*) =>
val includes = Seq("sql/delta/icebergShaded", "org.apache.spark.sql.delta.commands.convert")
val includes = Set(
List("sql", "delta", "icebergShaded"),
List("sql", "delta", "commands", "convert")
)
println(s"------> ${xs.mkString("/")}")
case PathList("org", "apache", "spark", xs @ _*)
if !deltaIcebergSparkIncludePrefixes.exists { prefix =>
s"org/apache/spark/${xs.mkString("/")}".startsWith(prefix) } =>
println(s"Discarding class: org/apache/spark/${xs.mkString("/")}")
MergeStrategy.discard
case x =>
println(s"AAAA $x")
println(s"Including class: $x")
(assembly / assemblyMergeStrategy).value(x)
},
assemblyPackageScala / assembleArtifact := false
)
// scalastyle:on println

lazy val generateIcebergJarsTask = TaskKey[Unit]("generateIcebergJars", "Generate Iceberg JARs")

Expand Down
42 changes: 36 additions & 6 deletions testDeltaIcebergJar/src/test/scala/JarSuite.scala
Original file line number Diff line number Diff line change
Expand Up @@ -25,23 +25,53 @@ import scala.collection.JavaConverters._
import org.scalatest.funsuite.AnyFunSuite

class JarSuite extends AnyFunSuite {

val allowedClassPrefixes = Seq(
// e.g. shadedForDelta/org/apache/iceberg/BaseTable.class
"shadedForDelta/",
// e.g. scala/collection/compat/immutable/ArraySeq.class
// e.g. scala/jdk/CollectionConverters.class
"scala/",
// e.g. org/apache/spark/sql/delta/icebergShaded/IcebergTransactionUtils.class
"org/apache/spark/sql/delta/icebergShaded/",
// We explicitly include all the /delta/commands/convert classes we want, to ensure we don't
// accidentally pull in some from delta-spark package.
"org/apache/spark/sql/delta/commands/convert/IcebergFileManifest",
"org/apache/spark/sql/delta/commands/convert/IcebergSchemaUtils",
"org/apache/spark/sql/delta/commands/convert/IcebergTable",
// e.g. org/apache/iceberg/transforms/IcebergPartitionUtil.class
"org/apache/iceberg/",
"com/github/benmanes/caffeine/"
)

test("audit files in assembly jar") {
// Step 1: load the jar (and make sure it exists)
// scalastyle:off classforname
val classUrl = Class.forName("org.apache.spark.sql.delta.icebergShaded.IcebergConverter").getResource("IcebergConverter.class")
// scalastyle:on classforname
assert(classUrl != null, "Could not find delta-iceberg jar")
println(classUrl)
val connection = classUrl.openConnection().asInstanceOf[JarURLConnection]

val url = connection.getJarFileURL()
val url = connection.getJarFileURL
val jarFile = new JarFile(new File(url.toURI))

// Step 2: Verify the JAR has the classes we want it to ahve
// Step 2: Verify the JAR has the classes we want it to have
try {
val set = jarFile.entries().asScala.filter(!_.isDirectory).map(_.toString).toSet
println(set.toList.sorted.mkString("\n"))
val jarClasses = jarFile
.entries()
.asScala
.filter(!_.isDirectory)
.map(_.toString)
.filter(_.endsWith(".class")) // let's ignore any .properties or META-INF files for now
.toSet

val prohibitedJarClasses = jarClasses
.filter { clazz => !allowedClassPrefixes.exists(prefix => clazz.startsWith(prefix)) }

if (prohibitedJarClasses.nonEmpty) {
throw new Exception(
s"Prohibited jar class(es) found:\n- ${prohibitedJarClasses.mkString("\n- ")}"
)
}
} finally {
jarFile.close()
}
Expand Down

0 comments on commit c918220

Please sign in to comment.