Skip to content

Commit

Permalink
[SPARK-25406][SQL] For ParquetSchemaPruningSuite.scala, move calls to…
Browse files Browse the repository at this point in the history
… `withSQLConf` inside calls to `test`

(Link to Jira: https://issues.apache.org/jira/browse/SPARK-25406)

The current use of `withSQLConf` in `ParquetSchemaPruningSuite.scala` is incorrect. The desired configuration settings are not being set when running the test cases.

This PR fixes that defective usage and addresses the test failures that were previously masked by that defect.

I added code to relevant test cases to print the expected SQL configuration settings and found that the settings were not being set as expected. When I changed the order of calls to `test` and `withSQLConf` I found that the configuration settings were being set as expected.

Closes apache#22394 from mallman/spark-25406-fix_broken_schema_pruning_tests.

Authored-by: Michael Allman <msa@allman.ms>
Signed-off-by: DB Tsai <d_tsai@apple.com>
(cherry picked from commit a7e5aa6)
Signed-off-by: DB Tsai <d_tsai@apple.com>

Ref: LIHADOOP-48531

RB=1857521
A=
  • Loading branch information
mallman authored and otterc committed Oct 28, 2019
1 parent 9971a8d commit e538116
Showing 1 changed file with 38 additions and 25 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -218,20 +218,24 @@ class ParquetSchemaPruningSuite
}

private def testSchemaPruning(testName: String)(testThunk: => Unit) {
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true") {
test(s"Spark vectorized reader - without partition data column - $testName") {
test(s"Spark vectorized reader - without partition data column - $testName") {
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true") {
withContacts(testThunk)
}
test(s"Spark vectorized reader - with partition data column - $testName") {
}
test(s"Spark vectorized reader - with partition data column - $testName") {
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true") {
withContactsWithDataPartitionColumn(testThunk)
}
}

withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
test(s"Parquet-mr reader - without partition data column - $testName") {
test(s"Parquet-mr reader - without partition data column - $testName") {
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
withContacts(testThunk)
}
test(s"Parquet-mr reader - with partition data column - $testName") {
}
test(s"Parquet-mr reader - with partition data column - $testName") {
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false") {
withContactsWithDataPartitionColumn(testThunk)
}
}
Expand Down Expand Up @@ -271,7 +275,7 @@ class ParquetSchemaPruningSuite
MixedCase(1, "r1c1", MixedCaseColumn("123", 2)) ::
Nil

testMixedCasePruning("select with exact column names") {
testExactCaseQueryPruning("select with exact column names") {
val query = sql("select CoL1, coL2.B from mixedcase")
checkScan(query, "struct<CoL1:string,coL2:struct<B:int>>")
checkAnswer(query.orderBy("id"),
Expand All @@ -280,7 +284,7 @@ class ParquetSchemaPruningSuite
Nil)
}

testMixedCasePruning("select with lowercase column names") {
testMixedCaseQueryPruning("select with lowercase column names") {
val query = sql("select col1, col2.b from mixedcase")
checkScan(query, "struct<CoL1:string,coL2:struct<B:int>>")
checkAnswer(query.orderBy("id"),
Expand All @@ -289,7 +293,7 @@ class ParquetSchemaPruningSuite
Nil)
}

testMixedCasePruning("select with different-case column names") {
testMixedCaseQueryPruning("select with different-case column names") {
val query = sql("select cOL1, cOl2.b from mixedcase")
checkScan(query, "struct<CoL1:string,coL2:struct<B:int>>")
checkAnswer(query.orderBy("id"),
Expand All @@ -298,34 +302,43 @@ class ParquetSchemaPruningSuite
Nil)
}

testMixedCasePruning("filter with different-case column names") {
testMixedCaseQueryPruning("filter with different-case column names") {
val query = sql("select id from mixedcase where Col2.b = 2")
checkScan(query, "struct<id:int,coL2:struct<B:int>>")
checkAnswer(query.orderBy("id"), Row(1) :: Nil)
}

private def testMixedCasePruning(testName: String)(testThunk: => Unit) {
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true",
SQLConf.CASE_SENSITIVE.key -> "true") {
test(s"Spark vectorized reader - case-sensitive parser - mixed-case schema - $testName") {
withMixedCaseData(testThunk)
// Tests schema pruning for a query whose column and field names are exactly the same as the table
// schema's column and field names. N.B. this implies that `testThunk` should pass using either a
// case-sensitive or case-insensitive query parser
private def testExactCaseQueryPruning(testName: String)(testThunk: => Unit) {
test(s"Spark vectorized reader - case-sensitive parser - mixed-case schema - $testName") {
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true",
SQLConf.CASE_SENSITIVE.key -> "true") {
withMixedCaseData(testThunk)
}
}
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false",
SQLConf.CASE_SENSITIVE.key -> "false") {
test(s"Parquet-mr reader - case-insensitive parser - mixed-case schema - $testName") {
test(s"Parquet-mr reader - case-sensitive parser - mixed-case schema - $testName") {
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false",
SQLConf.CASE_SENSITIVE.key -> "true") {
withMixedCaseData(testThunk)
}
}
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true",
SQLConf.CASE_SENSITIVE.key -> "false") {
test(s"Spark vectorized reader - case-insensitive parser - mixed-case schema - $testName") {
withMixedCaseData(testThunk)
testMixedCaseQueryPruning(testName)(testThunk)
}

// Tests schema pruning for a query whose column and field names may differ in case from the table
// schema's column and field names
private def testMixedCaseQueryPruning(testName: String)(testThunk: => Unit) {
test(s"Spark vectorized reader - case-insensitive parser - mixed-case schema - $testName") {
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "true",
SQLConf.CASE_SENSITIVE.key -> "false") {
withMixedCaseData(testThunk)
}
}
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false",
SQLConf.CASE_SENSITIVE.key -> "true") {
test(s"Parquet-mr reader - case-sensitive parser - mixed-case schema - $testName") {
test(s"Parquet-mr reader - case-insensitive parser - mixed-case schema - $testName") {
withSQLConf(SQLConf.PARQUET_VECTORIZED_READER_ENABLED.key -> "false",
SQLConf.CASE_SENSITIVE.key -> "false") {
withMixedCaseData(testThunk)
}
}
Expand Down

0 comments on commit e538116

Please sign in to comment.