Skip to content

Commit

Permalink
CARMEL-3537 add ut for spark-26859 (#30)
Browse files Browse the repository at this point in the history
  • Loading branch information
hongyzhang authored and mingmwang committed Aug 19, 2020
1 parent 129e30d commit b1fc906
Showing 1 changed file with 33 additions and 0 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -365,6 +365,39 @@ abstract class OrcSuite extends OrcTest with BeforeAndAfterAll {
}
}

test("SPARK-26859 Fix field writer index bug in non-vectorized ORC deserializer") {
withSQLConf(SQLConf.ORC_VECTORIZED_READER_ENABLED.key -> "false") {
withTempPath { dir =>
val path = dir.getCanonicalPath

val df1 = Seq((1, 2, "abc"), (4, 5, "def"), (8, 9, null)).toDF("col1", "col2", "col3")
val df2 = Seq((10, null, 20, null), (40, "uvw", 50, "xyz"), (80, null, 90, null))
.toDF("col1", "col4", "col2", "col3")

val dir1 = s"$path${File.separator}part=one"
val dir2 = s"$path${File.separator}part=two"

val format = "orc"

df1.write.format(format).save(dir1)
df2.write.format(format).save(dir2)

val df = spark.read
.schema(df2.schema)
.format(format)
.load(path)

checkAnswer(df, Seq(
Row(1, null, 2, "abc", "one"),
Row(4, null, 5, "def", "one"),
Row(8, null, 9, null, "one"),
Row(10, null, 20, null, "two"),
Row(40, "uvw", 50, "xyz", "two"),
Row(80, null, 90, null, "two")))
}
}
}

test("Write Spark version into ORC file metadata") {
withTempPath { path =>
spark.range(1).repartition(1).write.orc(path.getCanonicalPath)
Expand Down

0 comments on commit b1fc906

Please sign in to comment.