Skip to content

Commit

Permalink
[SPARK-3329][SQL] Don't depend on Hive SET pair ordering in tests.
Browse files Browse the repository at this point in the history
This fixes some possible spurious test failures in `HiveQuerySuite` by comparing sets of key-value pairs as sets, rather than as lists.

Author: William Benton <willb@redhat.com>
Author: Aaron Davidson <aaron@databricks.com>

Closes #2220 from willb/spark-3329 and squashes the following commits:

3b3e205 [William Benton] Collapse collectResults case match in HiveQuerySuite
6525d8e [William Benton] Handle cases where SET returns Rows of (single) strings
cf11b0e [Aaron Davidson] Fix flakey HiveQuerySuite test
  • Loading branch information
willb authored and marmbrus committed Sep 9, 2014
1 parent dc1dbf2 commit 2b7ab81
Showing 1 changed file with 26 additions and 21 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -558,62 +558,67 @@ class HiveQuerySuite extends HiveComparisonTest {
val testKey = "spark.sql.key.usedfortestonly"
val testVal = "test.val.0"
val nonexistentKey = "nonexistent"

val KV = "([^=]+)=([^=]*)".r
def collectResults(rdd: SchemaRDD): Set[(String, String)] =
rdd.collect().map {
case Row(key: String, value: String) => key -> value
case Row(KV(key, value)) => key -> value
}.toSet
clear()

// "set" itself returns all config variables currently specified in SQLConf.
// TODO: Should we be listing the default here always? probably...
assert(sql("SET").collect().size == 0)

assertResult(Array(s"$testKey=$testVal")) {
sql(s"SET $testKey=$testVal").collect().map(_.getString(0))
assertResult(Set(testKey -> testVal)) {
collectResults(hql(s"SET $testKey=$testVal"))
}

assert(hiveconf.get(testKey, "") == testVal)
assertResult(Array(s"$testKey=$testVal")) {
sql(s"SET $testKey=$testVal").collect().map(_.getString(0))
assertResult(Set(testKey -> testVal)) {
collectResults(hql("SET"))
}

sql(s"SET ${testKey + testKey}=${testVal + testVal}")
assert(hiveconf.get(testKey + testKey, "") == testVal + testVal)
assertResult(Array(s"$testKey=$testVal", s"${testKey + testKey}=${testVal + testVal}")) {
sql(s"SET").collect().map(_.getString(0))
assertResult(Set(testKey -> testVal, (testKey + testKey) -> (testVal + testVal))) {
collectResults(hql("SET"))
}

// "set key"
assertResult(Array(s"$testKey=$testVal")) {
sql(s"SET $testKey").collect().map(_.getString(0))
assertResult(Set(testKey -> testVal)) {
collectResults(hql(s"SET $testKey"))
}

assertResult(Array(s"$nonexistentKey=<undefined>")) {
sql(s"SET $nonexistentKey").collect().map(_.getString(0))
assertResult(Set(nonexistentKey -> "<undefined>")) {
collectResults(hql(s"SET $nonexistentKey"))
}

// Assert that sql() should have the same effects as sql() by repeating the above using sql().
clear()
assert(sql("SET").collect().size == 0)

assertResult(Array(s"$testKey=$testVal")) {
sql(s"SET $testKey=$testVal").collect().map(_.getString(0))
assertResult(Set(testKey -> testVal)) {
collectResults(sql(s"SET $testKey=$testVal"))
}

assert(hiveconf.get(testKey, "") == testVal)
assertResult(Array(s"$testKey=$testVal")) {
sql("SET").collect().map(_.getString(0))
assertResult(Set(testKey -> testVal)) {
collectResults(sql("SET"))
}

sql(s"SET ${testKey + testKey}=${testVal + testVal}")
assert(hiveconf.get(testKey + testKey, "") == testVal + testVal)
assertResult(Array(s"$testKey=$testVal", s"${testKey + testKey}=${testVal + testVal}")) {
sql("SET").collect().map(_.getString(0))
assertResult(Set(testKey -> testVal, (testKey + testKey) -> (testVal + testVal))) {
collectResults(sql("SET"))
}

assertResult(Array(s"$testKey=$testVal")) {
sql(s"SET $testKey").collect().map(_.getString(0))
assertResult(Set(testKey -> testVal)) {
collectResults(sql(s"SET $testKey"))
}

assertResult(Array(s"$nonexistentKey=<undefined>")) {
sql(s"SET $nonexistentKey").collect().map(_.getString(0))
assertResult(Set(nonexistentKey -> "<undefined>")) {
collectResults(sql(s"SET $nonexistentKey"))
}

clear()
Expand Down

0 comments on commit 2b7ab81

Please sign in to comment.