databricks · themodernlife · Nov 28, 2016 · Nov 28, 2016 · Jan 26, 2017 · Feb 2, 2017
diff --git a/.travis.yml b/.travis.yml
@@ -11,17 +11,39 @@ before_cache:
   - find $HOME/.sbt -name "*.lock" -delete
 matrix:
   include:
+    # ---- Spark 2.0.x ----------------------------------------------------------------------------
+    # Spark 2.0.0, Scala 2.11, and Avro 1.7.x
+    - jdk: openjdk7
+      scala: 2.11.7
+      env: TEST_HADOOP_VERSION="2.2.0" TEST_SPARK_VERSION="2.0.0" TEST_AVRO_VERSION="1.7.6" TEST_AVRO_MAPRED_VERSION="1.7.7"
+    # Spark 2.0.0, Scala 2.11, and Avro 1.8.x
+    - jdk: openjdk7
+      scala: 2.11.7
+      env: TEST_HADOOP_VERSION="2.2.0" TEST_SPARK_VERSION="2.0.0" TEST_AVRO_VERSION="1.8.0" TEST_AVRO_MAPRED_VERSION="1.8.0"
+    # Spark 2.0.0, Scala 2.10, and Avro 1.7.x
+    - jdk: openjdk7
+      scala: 2.10.4
+      env: TEST_HADOOP_VERSION="2.2.0" TEST_SPARK_VERSION="2.0.0" TEST_AVRO_VERSION="1.7.6" TEST_AVRO_MAPRED_VERSION="1.7.7"
+    # Spark 2.0.0, Scala 2.10, and Avro 1.8.x
+    - jdk: openjdk7
+      scala: 2.10.4
+      env: TEST_HADOOP_VERSION="2.2.0" TEST_SPARK_VERSION="2.0.0" TEST_AVRO_VERSION="1.8.0" TEST_AVRO_MAPRED_VERSION="1.8.0"
+    # ---- Spark 2.1.x ----------------------------------------------------------------------------
     # Spark 2.1.0, Scala 2.11, and Avro 1.7.x
     - jdk: openjdk7
-      scala: 2.11.8
+      scala: 2.11.7
       env: TEST_HADOOP_VERSION="2.2.0" TEST_SPARK_VERSION="2.1.0" TEST_AVRO_VERSION="1.7.6" TEST_AVRO_MAPRED_VERSION="1.7.7"
+    # Spark 2.1.0, Scala 2.11, and Avro 1.8.x
+    - jdk: openjdk7
+      scala: 2.11.7
+      env: TEST_HADOOP_VERSION="2.2.0" TEST_SPARK_VERSION="2.1.0" TEST_AVRO_VERSION="1.8.0" TEST_AVRO_MAPRED_VERSION="1.8.0"
     # Spark 2.1.0, Scala 2.10, and Avro 1.7.x
     - jdk: openjdk7
-      scala: 2.10.6
+      scala: 2.10.4
       env: TEST_HADOOP_VERSION="2.2.0" TEST_SPARK_VERSION="2.1.0" TEST_AVRO_VERSION="1.7.6" TEST_AVRO_MAPRED_VERSION="1.7.7"
     # Spark 2.1.0, Scala 2.10, and Avro 1.8.x
     - jdk: openjdk7
-      scala: 2.10.6
+      scala: 2.10.4
       env: TEST_HADOOP_VERSION="2.2.0" TEST_SPARK_VERSION="2.1.0" TEST_AVRO_VERSION="1.8.0" TEST_AVRO_MAPRED_VERSION="1.8.0"
     # Spark 2.2.0, Scala 2.11, and Avro 1.7.x
     - jdk: openjdk8

diff --git a/build.sbt b/build.sbt
@@ -1,14 +1,17 @@
-name := "spark-avro"
 
-organization := "com.databricks"
+lazy val commonSettings = Seq(
+  organization := "com.databricks",
+  scalaVersion := "2.11.7",
+  crossScalaVersions := Seq("2.10.5", "2.11.7")
+)
 
-scalaVersion := "2.11.8"
+commonSettings
 
-crossScalaVersions := Seq("2.10.6", "2.11.8")
+name := "spark-avro"
 
 spName := "databricks/spark-avro"
 
-sparkVersion := "2.1.0"
+sparkVersion := "2.0.0"
 
 val testSparkVersion = settingKey[String]("The version of Spark to test against.")
 
@@ -107,7 +110,7 @@ pomExtra :=
 
 bintrayReleaseOnPublish in ThisBuild := false
 
-import ReleaseTransformations._
+import sbtrelease.ReleasePlugin.autoImport.ReleaseTransformations._
 
 // Add publishing to spark packages as another step.
 releaseProcess := Seq[ReleaseStep](
@@ -123,3 +126,26 @@ releaseProcess := Seq[ReleaseStep](
   pushChanges,
   releaseStepTask(spPublish)
 )
+
+
+lazy val spark21xProj = project.in(file("spark-2.1.x")).settings(
+  commonSettings,
+  libraryDependencies += "org.apache.spark" %% "spark-sql" % "2.1.0" % "provided"
+).disablePlugins(SparkPackagePlugin)
+
+
+lazy val spark20xProj = project.in(file("spark-2.0.x")).settings(
+  commonSettings,
+  libraryDependencies += "org.apache.spark" %% "spark-sql" % "2.0.0" % "provided"
+).disablePlugins(SparkPackagePlugin)
+
+
+unmanagedClasspath in Test ++= {
+  (exportedProducts in (spark20xProj, Runtime)).value ++
+    (exportedProducts in (spark21xProj, Runtime)).value
+}
+
+products in (Compile, packageBin) ++= Seq(
+  (classDirectory in (spark20xProj, Compile)).value,
+  (classDirectory in (spark21xProj, Compile)).value
+)
diff --git a/spark-2.0.x/src/main/scala/com/databricks/spark/avro/Spark20AvroOutputWriterFactory.scala b/spark-2.0.x/src/main/scala/com/databricks/spark/avro/Spark20AvroOutputWriterFactory.scala
@@ -0,0 +1,52 @@
+/*
+ * Copyright 2014 Databricks
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.databricks.spark.avro
+
+import org.apache.hadoop.fs.Path
+import org.apache.hadoop.mapreduce.{TaskAttemptContext, TaskAttemptID}
+import org.apache.spark.sql.execution.datasources.{OutputWriter, OutputWriterFactory}
+import org.apache.spark.sql.types.StructType
+
+private[avro] class Spark20AvroOutputWriterFactory(
+    schema: StructType,
+    recordName: String,
+    recordNamespace: String) extends OutputWriterFactory {
+
+  def doGetDefaultWorkFile(path: String, context: TaskAttemptContext, extension: String): Path = {
+    val uniqueWriteJobId = context.getConfiguration.get("spark.sql.sources.writeJobUUID")
+    val taskAttemptId: TaskAttemptID = context.getTaskAttemptID
+    val split = taskAttemptId.getTaskID.getId
+    new Path(path, f"part-r-$split%05d-$uniqueWriteJobId$extension")
+  }
+
+  def newInstance(
+      path: String,
+      bucketId: Option[Int],
+      dataSchema: StructType,
+      context: TaskAttemptContext): OutputWriter = {
+
+    val ot = Class.forName("com.databricks.spark.avro.AvroOutputWriter")
+    val meth = ot.getDeclaredConstructor(
+      classOf[String], classOf[TaskAttemptContext], classOf[StructType],
+      classOf[String], classOf[String],
+      classOf[Function3[String, TaskAttemptContext, String, Path]]
+    )
+    meth.setAccessible(true)
+    meth.newInstance(path, context, schema, recordName, recordNamespace, doGetDefaultWorkFile _)
+      .asInstanceOf[OutputWriter]
+  }
+}
diff --git a/.../spark/avro/AvroOutputWriterFactory.scala → ...avro/Spark21AvroOutputWriterFactory.scala b/.../spark/avro/AvroOutputWriterFactory.scala → ...avro/Spark21AvroOutputWriterFactory.scala
@@ -16,24 +16,35 @@
 
 package com.databricks.spark.avro
 
+import org.apache.hadoop.fs.Path
 import org.apache.hadoop.mapreduce.TaskAttemptContext
-
 import org.apache.spark.sql.execution.datasources.{OutputWriter, OutputWriterFactory}
 import org.apache.spark.sql.types.StructType
 
-private[avro] class AvroOutputWriterFactory(
+private[avro] class Spark21AvroOutputWriterFactory(
     schema: StructType,
     recordName: String,
     recordNamespace: String) extends OutputWriterFactory {
 
-  override def getFileExtension(context: TaskAttemptContext): String = {
-    ".avro"
+  def doGetDefaultWorkFile(path: String, context: TaskAttemptContext, extension: String): Path = {
+    new Path(path)
   }
 
-  override def newInstance(
-      path: String,
-      dataSchema: StructType,
-      context: TaskAttemptContext): OutputWriter = {
-    new AvroOutputWriter(path, context, schema, recordName, recordNamespace)
+  def newInstance(
+       path: String,
+       dataSchema: StructType,
+       context: TaskAttemptContext): OutputWriter = {
+
+    val ot = Class.forName("com.databricks.spark.avro.AvroOutputWriter")
+    val meth = ot.getDeclaredConstructor(
+      classOf[String], classOf[TaskAttemptContext], classOf[StructType],
+      classOf[String], classOf[String],
+      classOf[Function3[String, TaskAttemptContext, String, Path]]
+    )
+    meth.setAccessible(true)
+    meth.newInstance(path, context, schema, recordName, recordNamespace, doGetDefaultWorkFile _)
+      .asInstanceOf[OutputWriter]
   }
+
+  override def getFileExtension(context: TaskAttemptContext): String = ".avro"
 }