From 71180e35ca30a5e17bceab7ad8307f5a81b0366a Mon Sep 17 00:00:00 2001 From: Grigory Pomadchin Date: Thu, 1 Jun 2023 18:29:25 -0400 Subject: [PATCH] build.sbt cleanup --- README.md | 34 ++++---- build.sbt | 80 +++++-------------- .../src/main/pre34/frameless/MapGroups.scala | 14 ---- .../main/spark-3.4+/frameless/MapGroups.scala | 21 +++++ .../main/spark-3/frameless/MapGroups.scala | 14 ++++ .../main/spark34/frameless/MapGroups.scala | 16 ---- 6 files changed, 70 insertions(+), 109 deletions(-) delete mode 100644 dataset/src/main/pre34/frameless/MapGroups.scala create mode 100644 dataset/src/main/spark-3.4+/frameless/MapGroups.scala create mode 100644 dataset/src/main/spark-3/frameless/MapGroups.scala delete mode 100644 dataset/src/main/spark34/frameless/MapGroups.scala diff --git a/README.md b/README.md index 514043cc..ec9deb33 100644 --- a/README.md +++ b/README.md @@ -25,22 +25,22 @@ associated channels (e.g. GitHub, Discord) to be a safe and friendly environment The compatible versions of [Spark](http://spark.apache.org/) and [cats](https://github.com/typelevel/cats) are as follows: -| Frameless | Spark | Cats | Cats-Effect | Scala -|-----------|-------------------------------| -------- | ----------- | --- -| 0.4.0 | 2.2.0 | 1.0.0-IF | 0.4 | 2.11 -| 0.4.1 | 2.2.0 | 1.x | 0.8 | 2.11 -| 0.5.2 | 2.2.1 | 1.x | 0.8 | 2.11 -| 0.6.1 | 2.3.0 | 1.x | 0.8 | 2.11 -| 0.7.0 | 2.3.1 | 1.x | 1.x | 2.11 -| 0.8.0 | 2.4.0 | 1.x | 1.x | 2.11 / 2.12 -| 0.9.0 | 3.0.0 | 1.x | 1.x | 2.12 -| 0.10.1 | 3.1.0 | 2.x | 2.x | 2.12 -| 0.11.0* | 3.2.0 / 3.1.2 / 3.0.1 | 2.x | 2.x | 2.12 / 2.13 -| 0.11.1 | 3.2.0 / 3.1.2 / 3.0.1 | 2.x | 2.x | 2.12 / 2.13 -| 0.12.0 | 3.2.1 / 3.1.3 / 3.0.3 | 2.x | 3.x | 2.12 / 2.13 -| 0.13.0 | 3.3.0 / 3.2.2 / 3.1.3 | 2.x | 3.x | 2.12 / 2.13 -| 0.14.0 | 3.3.0 / 3.2.2 / 3.1.3 | 2.x | 3.x | 2.12 / 2.13 -| 0.14.1 | 3.3.0 / 3.2.2 / 3.1.3 / 3.4.0 | 2.x | 3.x | 2.12 / 2.13 +| Frameless | Spark | Cats | Cats-Effect | Scala | +|-----------|-----------------------|----------|-------------|-------------| +| 0.4.0 | 2.2.0 | 1.0.0-IF | 0.4 | 2.11 | +| 0.4.1 | 2.2.0 | 1.x | 0.8 | 2.11 | +| 0.5.2 | 2.2.1 | 1.x | 0.8 | 2.11 | +| 0.6.1 | 2.3.0 | 1.x | 0.8 | 2.11 | +| 0.7.0 | 2.3.1 | 1.x | 1.x | 2.11 | +| 0.8.0 | 2.4.0 | 1.x | 1.x | 2.11 / 2.12 | +| 0.9.0 | 3.0.0 | 1.x | 1.x | 2.12 | +| 0.10.1 | 3.1.0 | 2.x | 2.x | 2.12 | +| 0.11.0* | 3.2.0 / 3.1.2 / 3.0.1 | 2.x | 2.x | 2.12 / 2.13 | +| 0.11.1 | 3.2.0 / 3.1.2 / 3.0.1 | 2.x | 2.x | 2.12 / 2.13 | +| 0.12.0 | 3.2.1 / 3.1.3 / 3.0.3 | 2.x | 3.x | 2.12 / 2.13 | +| 0.13.0 | 3.3.0 / 3.2.2 / 3.1.3 | 2.x | 3.x | 2.12 / 2.13 | +| 0.14.0 | 3.3.0 / 3.2.2 / 3.1.3 | 2.x | 3.x | 2.12 / 2.13 | +| 0.14.1 | 3.4.0 / 3.3.0 / 3.2.2 | 2.x | 3.x | 2.12 / 2.13 | _\* 0.11.0 has broken Spark 3.1.2 and 3.0.1 artifacts published._ @@ -52,8 +52,8 @@ Starting 0.11 we introduced Spark cross published artifacts: Artifact names examples: * `frameless-dataset` (the latest Spark dependency) +* `frameless-dataset-spark33` (Spark 3.3.x dependency) * `frameless-dataset-spark32` (Spark 3.2.x dependency) -* `frameless-dataset-spark31` (Spark 3.1.x dependency) Versions 0.5.x and 0.6.x have identical features. The first is compatible with Spark 2.2.1 and the second with 2.3.0. diff --git a/build.sbt b/build.sbt index e185f378..e985245c 100644 --- a/build.sbt +++ b/build.sbt @@ -1,7 +1,6 @@ val sparkVersion = "3.4.0" -val spark32Version = "3.2.4" val spark33Version = "3.3.2" -val spark31Version = "3.1.3" +val spark32Version = "3.2.4" val catsCoreVersion = "2.9.0" val catsEffectVersion = "3.4.10" val catsMtlVersion = "1.3.1" @@ -24,7 +23,7 @@ ThisBuild / githubWorkflowArtifactUpload := false // doesn't work with scoverage lazy val root = project.in(file(".")) .enablePlugins(NoPublishPlugin) - .aggregate(`root-spark34`, `root-spark33`, `root-spark32`, `root-spark31`, docs) + .aggregate(`root-spark34`, `root-spark33`, `root-spark32`, docs) lazy val `root-spark34` = project .in(file(".spark34")) @@ -41,11 +40,6 @@ lazy val `root-spark32` = project .enablePlugins(NoPublishPlugin) .aggregate(core, `cats-spark32`, `dataset-spark32`, `refined-spark32`, `ml-spark32`) -lazy val `root-spark31` = project - .in(file(".spark31")) - .enablePlugins(NoPublishPlugin) - .aggregate(core, `cats-spark31`, `dataset-spark31`, `refined-spark31`, `ml-spark31`) - lazy val core = project .settings(name := "frameless-core") .settings(framelessSettings) @@ -69,25 +63,17 @@ lazy val `cats-spark32` = project .settings(spark32Settings) .dependsOn(`dataset-spark32` % "test->test;compile->compile;provided->provided") -lazy val `cats-spark31` = project - .settings(name := "frameless-cats-spark31") - .settings(sourceDirectory := (cats / sourceDirectory).value) - .settings(catsSettings) - .settings(spark31Settings) - .dependsOn(`dataset-spark31` % "test->test;compile->compile;provided->provided") - lazy val dataset = project .settings(name := "frameless-dataset") - .settings(Compile / unmanagedSourceDirectories += baseDirectory.value / "src" / "main" / "spark34") + .settings(Compile / unmanagedSourceDirectories += baseDirectory.value / "src" / "main" / "spark-3.4+") .settings(datasetSettings) .settings(sparkDependencies(sparkVersion)) - .settings(spark32Settings) .dependsOn(core % "test->test;compile->compile") lazy val `dataset-spark33` = project .settings(name := "frameless-dataset-spark33") .settings(sourceDirectory := (dataset / sourceDirectory).value) - .settings(Compile / unmanagedSourceDirectories += (dataset / baseDirectory).value / "src" / "main" / "pre34") + .settings(Compile / unmanagedSourceDirectories += (dataset / baseDirectory).value / "src" / "main" / "spark-3") .settings(datasetSettings) .settings(sparkDependencies(spark33Version)) .settings(spark33Settings) @@ -96,21 +82,12 @@ lazy val `dataset-spark33` = project lazy val `dataset-spark32` = project .settings(name := "frameless-dataset-spark32") .settings(sourceDirectory := (dataset / sourceDirectory).value) - .settings(Compile / unmanagedSourceDirectories += (dataset / baseDirectory).value / "src" / "main" / "pre34") + .settings(Compile / unmanagedSourceDirectories += (dataset / baseDirectory).value / "src" / "main" / "spark-3") .settings(datasetSettings) .settings(sparkDependencies(spark32Version)) .settings(spark32Settings) .dependsOn(core % "test->test;compile->compile") -lazy val `dataset-spark31` = project - .settings(name := "frameless-dataset-spark31") - .settings(sourceDirectory := (dataset / sourceDirectory).value) - .settings(Compile / unmanagedSourceDirectories += (dataset / baseDirectory).value / "src" / "main" / "pre34") - .settings(datasetSettings) - .settings(sparkDependencies(spark31Version)) - .settings(spark31Settings) - .dependsOn(core % "test->test;compile->compile") - lazy val refined = project .settings(name := "frameless-refined") .settings(refinedSettings) @@ -130,13 +107,6 @@ lazy val `refined-spark32` = project .settings(spark32Settings) .dependsOn(`dataset-spark32` % "test->test;compile->compile;provided->provided") -lazy val `refined-spark31` = project - .settings(name := "frameless-refined-spark31") - .settings(sourceDirectory := (refined / sourceDirectory).value) - .settings(refinedSettings) - .settings(spark31Settings) - .dependsOn(`dataset-spark31` % "test->test;compile->compile;provided->provided") - lazy val ml = project .settings(name := "frameless-ml") .settings(mlSettings) @@ -168,17 +138,6 @@ lazy val `ml-spark32` = project `dataset-spark32` % "test->test;compile->compile;provided->provided" ) -lazy val `ml-spark31` = project - .settings(name := "frameless-ml-spark31") - .settings(sourceDirectory := (ml / sourceDirectory).value) - .settings(mlSettings) - .settings(sparkMlDependencies(spark31Version)) - .settings(spark31Settings) - .dependsOn( - core % "test->test;compile->compile", - `dataset-spark31` % "test->test;compile->compile;provided->provided" - ) - lazy val docs = project .in(file("mdocs")) .settings(framelessSettings) @@ -245,17 +204,7 @@ lazy val datasetSettings = framelessSettings ++ framelessTypedDatasetREPL ++ Seq ) lazy val refinedSettings = framelessSettings ++ framelessTypedDatasetREPL ++ Seq( - libraryDependencies += "eu.timepit" %% "refined" % refinedVersion, - /** - * The old Scala XML is pulled from Scala 2.12.x. - * - * [error] (update) found version conflict(s) in library dependencies; some are suspected to be binary incompatible: - * [error] - * [error] * org.scala-lang.modules:scala-xml_2.12:2.1.0 (early-semver) is selected over 1.0.6 - * [error] +- org.scoverage:scalac-scoverage-reporter_2.12:2.0.7 (depends on 2.1.0) - * [error] +- org.scala-lang:scala-compiler:2.12.16 (depends on 1.0.6) - */ - libraryDependencySchemes += "org.scala-lang.modules" %% "scala-xml" % VersionScheme.Always + libraryDependencies += "eu.timepit" %% "refined" % refinedVersion ) lazy val mlSettings = framelessSettings ++ framelessTypedDatasetREPL @@ -320,11 +269,18 @@ lazy val framelessSettings = Seq( mimaPreviousArtifacts ~= { _.filterNot(_.revision == "0.11.0") // didn't release properly }, -) ++ consoleSettings -lazy val spark31Settings = Seq( - crossScalaVersions := Seq(Scala212) -) + /** + * The old Scala XML is pulled from Scala 2.12.x. + * + * [error] (update) found version conflict(s) in library dependencies; some are suspected to be binary incompatible: + * [error] + * [error] * org.scala-lang.modules:scala-xml_2.12:2.1.0 (early-semver) is selected over 1.0.6 + * [error] +- org.scoverage:scalac-scoverage-reporter_2.12:2.0.7 (depends on 2.1.0) + * [error] +- org.scala-lang:scala-compiler:2.12.16 (depends on 1.0.6) + */ + libraryDependencySchemes += "org.scala-lang.modules" %% "scala-xml" % VersionScheme.Always +) ++ consoleSettings lazy val spark32Settings = Seq( tlVersionIntroduced := Map("2.12" -> "0.13.0", "2.13" -> "0.13.0") @@ -401,7 +357,7 @@ ThisBuild / githubWorkflowBuildPreamble ++= Seq( ) ) -val roots = List("root-spark31", "root-spark32", "root-spark33", "root-spark34") +val roots = List("root-spark32", "root-spark33", "root-spark34") ThisBuild / githubWorkflowBuildMatrixAdditions += "project" -> roots ThisBuild / githubWorkflowArtifactDownloadExtraKeys += "project" diff --git a/dataset/src/main/pre34/frameless/MapGroups.scala b/dataset/src/main/pre34/frameless/MapGroups.scala deleted file mode 100644 index e72fdb6b..00000000 --- a/dataset/src/main/pre34/frameless/MapGroups.scala +++ /dev/null @@ -1,14 +0,0 @@ -package frameless - -import org.apache.spark.sql.Encoder -import org.apache.spark.sql.catalyst.expressions.Attribute -import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, MapGroups => SMapGroups} - -object MapGroups { - def apply[K : Encoder, T : Encoder, U : Encoder]( - func: (K, Iterator[T]) => TraversableOnce[U], - groupingAttributes: Seq[Attribute], - dataAttributes: Seq[Attribute], - child: LogicalPlan): LogicalPlan = - SMapGroups(func, groupingAttributes, dataAttributes, child) -} \ No newline at end of file diff --git a/dataset/src/main/spark-3.4+/frameless/MapGroups.scala b/dataset/src/main/spark-3.4+/frameless/MapGroups.scala new file mode 100644 index 00000000..6856acba --- /dev/null +++ b/dataset/src/main/spark-3.4+/frameless/MapGroups.scala @@ -0,0 +1,21 @@ +package frameless + +import org.apache.spark.sql.Encoder +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, MapGroups => SMapGroups} + +object MapGroups { + def apply[K: Encoder, T: Encoder, U: Encoder]( + func: (K, Iterator[T]) => TraversableOnce[U], + groupingAttributes: Seq[Attribute], + dataAttributes: Seq[Attribute], + child: LogicalPlan + ): LogicalPlan = + SMapGroups( + func, + groupingAttributes, + dataAttributes, + Seq(), // #698 - no order given + child + ) +} diff --git a/dataset/src/main/spark-3/frameless/MapGroups.scala b/dataset/src/main/spark-3/frameless/MapGroups.scala new file mode 100644 index 00000000..3fd27f33 --- /dev/null +++ b/dataset/src/main/spark-3/frameless/MapGroups.scala @@ -0,0 +1,14 @@ +package frameless + +import org.apache.spark.sql.Encoder +import org.apache.spark.sql.catalyst.expressions.Attribute +import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, MapGroups => SMapGroups} + +object MapGroups { + def apply[K: Encoder, T: Encoder, U: Encoder]( + func: (K, Iterator[T]) => TraversableOnce[U], + groupingAttributes: Seq[Attribute], + dataAttributes: Seq[Attribute], + child: LogicalPlan + ): LogicalPlan = SMapGroups(func, groupingAttributes, dataAttributes, child) +} diff --git a/dataset/src/main/spark34/frameless/MapGroups.scala b/dataset/src/main/spark34/frameless/MapGroups.scala deleted file mode 100644 index 3e2814c3..00000000 --- a/dataset/src/main/spark34/frameless/MapGroups.scala +++ /dev/null @@ -1,16 +0,0 @@ -package frameless - -import org.apache.spark.sql.Encoder -import org.apache.spark.sql.catalyst.expressions.Attribute -import org.apache.spark.sql.catalyst.plans.logical.{LogicalPlan, MapGroups => SMapGroups} - -object MapGroups { - def apply[K : Encoder, T : Encoder, U : Encoder]( - func: (K, Iterator[T]) => TraversableOnce[U], - groupingAttributes: Seq[Attribute], - dataAttributes: Seq[Attribute], - child: LogicalPlan): LogicalPlan = - SMapGroups(func, groupingAttributes, dataAttributes, - Seq(), // #698 - possibly enough to provide a default of asc's for all keys? - child) -} \ No newline at end of file