From f54c94c36af64ee9c01190dbcf4d5a049c27db4a Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Thu, 9 Jan 2014 13:17:09 -0800
Subject: [PATCH 01/27] make golden answers file a test dependency

---
 build.sbt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build.sbt b/build.sbt
index afae82f22288c..401fc04fd3cdd 100644
--- a/build.sbt
+++ b/build.sbt
@@ -14,7 +14,7 @@ resolvers += "Local Maven Repository" at "file://"+Path.userHome.absolutePath+"/
 
 libraryDependencies += "org.apache.spark" %% "spark-core" % "0.9.0-incubating-SNAPSHOT"
 
-libraryDependencies += "catalyst" % "hive-golden" % "2" from "http://repository-databricks.forge.cloudbees.com/snapshot/catalystGolden2.jar"
+libraryDependencies += "catalyst" % "hive-golden" % "2" % "test" from "http://repository-databricks.forge.cloudbees.com/snapshot/catalystGolden2.jar"
 
 // Hive 0.10.0 relies on a weird version of jdo that is not published anywhere... Remove when we upgrade to 0.11.0
 libraryDependencies += "javax.jdo" % "jdo2-api" % "2.3-ec" from "http://www.datanucleus.org/downloads/maven2/javax/jdo/jdo2-api/2.3-ec/jdo2-api-2.3-ec.jar"

From eafaeed4d154bd3fec44fb94adf7ccd3eeca7b11 Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Thu, 9 Jan 2014 13:17:31 -0800
Subject: [PATCH 02/27] add type documentation

---
 src/main/scala/catalyst/execution/SharkInstance.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/scala/catalyst/execution/SharkInstance.scala b/src/main/scala/catalyst/execution/SharkInstance.scala
index dd1ec84d1618f..130578e87fbe2 100644
--- a/src/main/scala/catalyst/execution/SharkInstance.scala
+++ b/src/main/scala/catalyst/execution/SharkInstance.scala
@@ -36,7 +36,7 @@ abstract class SharkInstance extends Logging {
   def metastorePath: String
 
   /** The SharkContext */
-  lazy val sc = createContext()
+  lazy val sc: SharkContext = createContext()
 
   protected def createContext(): SharkContext = {
     SharkEnv.initWithSharkContext("catalyst.execution", master)

From 6f64cee0bd69dac4fbd63c987f47bde1931ecb32 Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Thu, 9 Jan 2014 13:17:46 -0800
Subject: [PATCH 03/27] don't line wrap string literal

---
 src/main/scala/catalyst/execution/SharkInstance.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/main/scala/catalyst/execution/SharkInstance.scala b/src/main/scala/catalyst/execution/SharkInstance.scala
index 130578e87fbe2..c7d4eba52751f 100644
--- a/src/main/scala/catalyst/execution/SharkInstance.scala
+++ b/src/main/scala/catalyst/execution/SharkInstance.scala
@@ -45,8 +45,8 @@ abstract class SharkInstance extends Logging {
   /** Sets up the system initially or after a RESET command */
   protected def configure() {
     // TODO: refactor this so we can work with other databases.
-    runSqlHive("set javax.jdo.option.ConnectionURL=jdbc:derby:;databaseName=" + metastorePath +
-      ";create=true")
+    runSqlHive(
+      s"set javax.jdo.option.ConnectionURL=jdbc:derby:;databaseName=$metastorePath;create=true")
     runSqlHive("set hive.metastore.warehouse.dir=" + warehousePath)
   }
 

From 9b02b44a38ebf55ff66749c4fc262be7c8c5c655 Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Thu, 9 Jan 2014 14:33:49 -0800
Subject: [PATCH 04/27] Fix spelling error. Add failFast mode.

---
 Makefile                                      | 55 ++++++++++---------
 .../catalyst/execution/BigDataBenchmark.scala |  2 +-
 ...ionTest.scala => HiveComparisonTest.scala} | 34 +++++++++---
 ...tability.scala => HiveCompatibility.scala} |  2 +-
 .../execution/HiveQueryFileTest.scala         |  2 +-
 .../catalyst/execution/HiveQueryTests.scala   |  2 +-
 .../execution/HiveResolutionSuite.scala       |  2 +-
 .../execution/HiveTypeCoersionSuite.scala     |  2 +-
 8 files changed, 61 insertions(+), 40 deletions(-)
 rename src/test/scala/catalyst/execution/{HiveComparisionTest.scala => HiveComparisonTest.scala} (84%)
 rename src/test/scala/catalyst/execution/{HiveCompatability.scala => HiveCompatibility.scala} (99%)

diff --git a/Makefile b/Makefile
index e763516432300..bbcf31f353131 100644
--- a/Makefile
+++ b/Makefile
@@ -1,54 +1,57 @@
 all: a b c d e f g h i j k l m n o p q r s t u v w x y" z
 
+findBroken:
+	sbt -Dshark.hive.alltests -Dshark.hive.failFast "test-only catalyst.execution.HiveCompatibility"
+
 a:
-	 sbt -Dshark.hive.whitelist=a.* "test-only catalyst.execution.HiveCompatability"
+	 sbt -Dshark.hive.whitelist=a.* "test-only catalyst.execution.HiveCompatibility"
 b:
-	 sbt -Dshark.hive.whitelist=b.* "test-only catalyst.execution.HiveCompatability"
+	 sbt -Dshark.hive.whitelist=b.* "test-only catalyst.execution.HiveCompatibility"
 c:
-	 sbt -Dshark.hive.whitelist=c.* "test-only catalyst.execution.HiveCompatability"
+	 sbt -Dshark.hive.whitelist=c.* "test-only catalyst.execution.HiveCompatibility"
 d:
-	 sbt -Dshark.hive.whitelist=d.* "test-only catalyst.execution.HiveCompatability"
+	 sbt -Dshark.hive.whitelist=d.* "test-only catalyst.execution.HiveCompatibility"
 e:
-	 sbt -Dshark.hive.whitelist=e.* "test-only catalyst.execution.HiveCompatability"
+	 sbt -Dshark.hive.whitelist=e.* "test-only catalyst.execution.HiveCompatibility"
 f:
-	 sbt -Dshark.hive.whitelist=f.* "test-only catalyst.execution.HiveCompatability"
+	 sbt -Dshark.hive.whitelist=f.* "test-only catalyst.execution.HiveCompatibility"
 g:
-	 sbt -Dshark.hive.whitelist=g.* "test-only catalyst.execution.HiveCompatability"
+	 sbt -Dshark.hive.whitelist=g.* "test-only catalyst.execution.HiveCompatibility"
 h:
-	 sbt -Dshark.hive.whitelist=h.* "test-only catalyst.execution.HiveCompatability"
+	 sbt -Dshark.hive.whitelist=h.* "test-only catalyst.execution.HiveCompatibility"
 i:
-	 sbt -Dshark.hive.whitelist=i.* "test-only catalyst.execution.HiveCompatability"
+	 sbt -Dshark.hive.whitelist=i.* "test-only catalyst.execution.HiveCompatibility"
 j:
-	 sbt -Dshark.hive.whitelist=j.* "test-only catalyst.execution.HiveCompatability"
+	 sbt -Dshark.hive.whitelist=j.* "test-only catalyst.execution.HiveCompatibility"
 k:
-	 sbt -Dshark.hive.whitelist=k.* "test-only catalyst.execution.HiveCompatability"
+	 sbt -Dshark.hive.whitelist=k.* "test-only catalyst.execution.HiveCompatibility"
 l:
-	 sbt -Dshark.hive.whitelist=l.* "test-only catalyst.execution.HiveCompatability"
+	 sbt -Dshark.hive.whitelist=l.* "test-only catalyst.execution.HiveCompatibility"
 m:
-	 sbt -Dshark.hive.whitelist=m.* "test-only catalyst.execution.HiveCompatability"
+	 sbt -Dshark.hive.whitelist=m.* "test-only catalyst.execution.HiveCompatibility"
 n:
-	 sbt -Dshark.hive.whitelist=n.* "test-only catalyst.execution.HiveCompatability"
+	 sbt -Dshark.hive.whitelist=n.* "test-only catalyst.execution.HiveCompatibility"
 o:
-	 sbt -Dshark.hive.whitelist=o.* "test-only catalyst.execution.HiveCompatability"
+	 sbt -Dshark.hive.whitelist=o.* "test-only catalyst.execution.HiveCompatibility"
 p:
-	 sbt -Dshark.hive.whitelist=p.* "test-only catalyst.execution.HiveCompatability"
+	 sbt -Dshark.hive.whitelist=p.* "test-only catalyst.execution.HiveCompatibility"
 q:
-	 sbt -Dshark.hive.whitelist=q.* "test-only catalyst.execution.HiveCompatability"
+	 sbt -Dshark.hive.whitelist=q.* "test-only catalyst.execution.HiveCompatibility"
 r:
-	 sbt -Dshark.hive.whitelist=r.* "test-only catalyst.execution.HiveCompatability"
+	 sbt -Dshark.hive.whitelist=r.* "test-only catalyst.execution.HiveCompatibility"
 s:
-	 sbt -Dshark.hive.whitelist=s.* "test-only catalyst.execution.HiveCompatability"
+	 sbt -Dshark.hive.whitelist=s.* "test-only catalyst.execution.HiveCompatibility"
 t:
-	 sbt -Dshark.hive.whitelist=t.* "test-only catalyst.execution.HiveCompatability"
+	 sbt -Dshark.hive.whitelist=t.* "test-only catalyst.execution.HiveCompatibility"
 u:
-	 sbt -Dshark.hive.whitelist=u.* "test-only catalyst.execution.HiveCompatability"
+	 sbt -Dshark.hive.whitelist=u.* "test-only catalyst.execution.HiveCompatibility"
 v:
-	 sbt -Dshark.hive.whitelist=v.* "test-only catalyst.execution.HiveCompatability"
+	 sbt -Dshark.hive.whitelist=v.* "test-only catalyst.execution.HiveCompatibility"
 w:
-	 sbt -Dshark.hive.whitelist=w.* "test-only catalyst.execution.HiveCompatability"
+	 sbt -Dshark.hive.whitelist=w.* "test-only catalyst.execution.HiveCompatibility"
 x:
-	 sbt -Dshark.hive.whitelist=x.* "test-only catalyst.execution.HiveCompatability"
+	 sbt -Dshark.hive.whitelist=x.* "test-only catalyst.execution.HiveCompatibility"
 y:
-	 sbt -Dshark.hive.whitelist=y.* "test-only catalyst.execution.HiveCompatability"
+	 sbt -Dshark.hive.whitelist=y.* "test-only catalyst.execution.HiveCompatibility"
 z:
-	 sbt -Dshark.hive.whitelist=z.* "test-only catalyst.execution.HiveCompatability"
+	 sbt -Dshark.hive.whitelist=z.* "test-only catalyst.execution.HiveCompatibility"
diff --git a/src/test/scala/catalyst/execution/BigDataBenchmark.scala b/src/test/scala/catalyst/execution/BigDataBenchmark.scala
index 52146d16f0cfe..1ff9cb6491746 100644
--- a/src/test/scala/catalyst/execution/BigDataBenchmark.scala
+++ b/src/test/scala/catalyst/execution/BigDataBenchmark.scala
@@ -7,7 +7,7 @@ import java.io.File
  * A set of test cases based on the big-data-benchmark.
  * https://amplab.cs.berkeley.edu/benchmark/
  */
-class BigDataBenchmarkTests extends HiveComaparisionTest {
+class BigDataBenchmarkTests extends HiveComparisonTest {
   import TestShark._
 
   val testDataDirectory = new File("target/big-data-benchmark-testdata")
diff --git a/src/test/scala/catalyst/execution/HiveComparisionTest.scala b/src/test/scala/catalyst/execution/HiveComparisonTest.scala
similarity index 84%
rename from src/test/scala/catalyst/execution/HiveComparisionTest.scala
rename to src/test/scala/catalyst/execution/HiveComparisonTest.scala
index bf73e8a153817..996c643125a26 100644
--- a/src/test/scala/catalyst/execution/HiveComparisionTest.scala
+++ b/src/test/scala/catalyst/execution/HiveComparisonTest.scala
@@ -11,22 +11,35 @@ import util._
  * Allows the creations of tests that execute the same query against both hive
  * and catalyst, comparing the results.
  *
- * The "golden" results from Hive are cached in [[answerCache]] to speed up testing.
+ * The "golden" results from Hive are cached in an retrieved both from the classpath and
+ * [[answerCache]] to speed up testing.
  */
-// TODO: correct the mispelled name.
-abstract class HiveComaparisionTest extends FunSuite with BeforeAndAfterAll with GivenWhenThen with Logging {
+abstract class HiveComparisonTest extends FunSuite with BeforeAndAfterAll with GivenWhenThen with Logging {
   protected val targetDir = new File("target")
+
+  /** The local directory with cached golden answer will be stored */
   protected val answerCache = new File(targetDir, "comparison-test-cache")
   if (!answerCache.exists)
     answerCache.mkdir()
 
+  /** The [[ClassLoader]] that contains test dependencies.  Used to look for golden answers. */
+  protected val testClassLoader = this.getClass.getClassLoader
+
+  /** A file where all the test cases that pass are written. Can be used to update the whiteList. */
   val passedFile = new File(targetDir, s"$suiteName.passed")
-  val passedList = new PrintWriter(passedFile)
+  protected val passedList = new PrintWriter(passedFile)
 
   override def afterAll() {
     passedList.close()
   }
 
+  /**
+   * When `-Dshark.hive.failFast` is set the first test to fail will cause all subsequent tests to
+   * also fail.
+   */
+  val failFast = System.getProperty("shark.hive.failFast") != null
+  private var testFailed = false
+
   protected val cacheDigest = java.security.MessageDigest.getInstance("MD5")
   protected def getMd5(str: String): String = {
     val digest = java.security.MessageDigest.getInstance("MD5")
@@ -36,7 +49,8 @@ abstract class HiveComaparisionTest extends FunSuite with BeforeAndAfterAll with
 
   protected def prepareAnswer(sharkQuery: TestShark.type#SharkSqlQuery, answer: Seq[String]): Seq[String] = {
     val orderedAnswer = sharkQuery.parsed match {
-      case _: Command => answer.filterNot(nonDeterministicLine) // Clean out nondeterministic time schema info.
+      // Clean out non-deterministic time schema info.
+      case _: Command => answer.filterNot(nonDeterministicLine)
       case _ =>
         val isOrdered = sharkQuery.executedPlan.collect { case s: Sort => s}.nonEmpty
         // If the query results aren't sorted, then sort them to ensure deterministic answers.
@@ -52,7 +66,7 @@ abstract class HiveComaparisionTest extends FunSuite with BeforeAndAfterAll with
     line.replaceAll("\"lastUpdateTime\":\\d+", "<UPDATETIME>")
 
   /**
-   * Removes non-deterministic paths from [[str]] so cached answers will still pass.
+   * Removes non-deterministic paths from str` so cached answers will still pass.
    */
   protected def cleanPaths(str: String): String = {
     str.replaceAll("file:\\/.*\\/", "<PATH>")
@@ -61,6 +75,8 @@ abstract class HiveComaparisionTest extends FunSuite with BeforeAndAfterAll with
   val installHooksCommand = "(?i)SET.*hooks".r
   def createQueryTest(testCaseName: String, sql: String) = {
     test(testCaseName) {
+      if(failFast && testFailed) sys.error("Failing fast due to previous failure")
+      testFailed = true
       logger.error(
        s"""
           |=============================
@@ -82,11 +98,11 @@ abstract class HiveComaparisionTest extends FunSuite with BeforeAndAfterAll with
         }
 
         val hiveCachedResults = hiveCacheFiles.flatMap { cachedAnswerFile =>
-          logger.debug(s"Looking for cached answer file $cachedAnswerFile.")
+          logger.warn(s"Looking for cached answer file $cachedAnswerFile.")
           if (cachedAnswerFile.exists) {
             Some(fileToString(cachedAnswerFile))
           } else if (getClass.getClassLoader.getResourceAsStream(cachedAnswerFile.toString) != null) {
-            Some(resourceToString(cachedAnswerFile.toString))
+            Some(resourceToString(cachedAnswerFile.toString, classLoader = testClassLoader))
           } else {
             logger.debug(s"File $cachedAnswerFile not found")
             None
@@ -123,6 +139,8 @@ abstract class HiveComaparisionTest extends FunSuite with BeforeAndAfterAll with
             computedResults
           }
 
+        testFailed = false
+
         // Run w/ catalyst
         val catalystResults = queryList.zip(hiveResults).map { case (queryString, hive) =>
           info(queryString)
diff --git a/src/test/scala/catalyst/execution/HiveCompatability.scala b/src/test/scala/catalyst/execution/HiveCompatibility.scala
similarity index 99%
rename from src/test/scala/catalyst/execution/HiveCompatability.scala
rename to src/test/scala/catalyst/execution/HiveCompatibility.scala
index 53eecffce4a52..32daa17156b92 100644
--- a/src/test/scala/catalyst/execution/HiveCompatability.scala
+++ b/src/test/scala/catalyst/execution/HiveCompatibility.scala
@@ -8,7 +8,7 @@ import util._
 /**
  * Runs the test cases that are included in the hive distribution.
  */
-class HiveCompatability extends HiveQueryFileTest {
+class HiveCompatibility extends HiveQueryFileTest {
   // TODO: bundle in jar files... get from classpath
   lazy val hiveQueryDir = new File(TestShark.hiveDevHome, "ql/src/test/queries/clientpositive")
   def testCases = hiveQueryDir.listFiles.map(f => f.getName.stripSuffix(".q") -> f)
diff --git a/src/test/scala/catalyst/execution/HiveQueryFileTest.scala b/src/test/scala/catalyst/execution/HiveQueryFileTest.scala
index afda9f7b5270b..4edcacc1131c3 100644
--- a/src/test/scala/catalyst/execution/HiveQueryFileTest.scala
+++ b/src/test/scala/catalyst/execution/HiveQueryFileTest.scala
@@ -11,7 +11,7 @@ import util._
  * TestSuites that derive from this class must provide a map of testCaseName -> testCaseFiles that should be included.
  * Additionally, there is support for whitelisting and blacklisting tests as development progresses.
  */
-abstract class HiveQueryFileTest extends HiveComaparisionTest {
+abstract class HiveQueryFileTest extends HiveComparisonTest {
   /** A list of tests deemed out of scope and thus completely disregarded */
   def blackList: Seq[String] = Nil
 
diff --git a/src/test/scala/catalyst/execution/HiveQueryTests.scala b/src/test/scala/catalyst/execution/HiveQueryTests.scala
index 5a39c35640684..a694a1d6194c6 100644
--- a/src/test/scala/catalyst/execution/HiveQueryTests.scala
+++ b/src/test/scala/catalyst/execution/HiveQueryTests.scala
@@ -4,7 +4,7 @@ package execution
 /**
  * A set of test cases expressed in Hive QL that are not covered by the tests included in the hive distribution.
  */
-class HiveQueryTests extends HiveComaparisionTest {
+class HiveQueryTests extends HiveComparisonTest {
   import TestShark._
 
   createQueryTest("Simple Average",
diff --git a/src/test/scala/catalyst/execution/HiveResolutionSuite.scala b/src/test/scala/catalyst/execution/HiveResolutionSuite.scala
index 13dfb951cb55a..4ab6c0dc80a1f 100644
--- a/src/test/scala/catalyst/execution/HiveResolutionSuite.scala
+++ b/src/test/scala/catalyst/execution/HiveResolutionSuite.scala
@@ -4,7 +4,7 @@ package execution
 /**
  * A set of test cases expressed in Hive QL that are not covered by the tests included in the hive distribution.
  */
-class HiveResolutionSuite extends HiveComaparisionTest {
+class HiveResolutionSuite extends HiveComparisonTest {
   import TestShark._
 
   createQueryTest("table.attr",
diff --git a/src/test/scala/catalyst/execution/HiveTypeCoersionSuite.scala b/src/test/scala/catalyst/execution/HiveTypeCoersionSuite.scala
index 095dce23aade9..f94b9951bacab 100644
--- a/src/test/scala/catalyst/execution/HiveTypeCoersionSuite.scala
+++ b/src/test/scala/catalyst/execution/HiveTypeCoersionSuite.scala
@@ -4,7 +4,7 @@ package execution
 /**
  * A set of tests that validate type promotion rules.
  */
-class HiveTypeCoersionSuite extends HiveComaparisionTest {
+class HiveTypeCoersionSuite extends HiveComparisonTest {
   import TestShark._
 
   val baseTypes = Seq("1", "1.0", "1L", "1S", "1Y", "'1'")

From 962761679ba3d5b7533cc8ca6db8c33aba15fb66 Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Thu, 9 Jan 2014 18:42:40 -0800
Subject: [PATCH 05/27] Use current database as default database.

---
 src/main/scala/catalyst/execution/MetastoreCatalog.scala | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/main/scala/catalyst/execution/MetastoreCatalog.scala b/src/main/scala/catalyst/execution/MetastoreCatalog.scala
index 590bccfe7c8f4..d83242583f541 100644
--- a/src/main/scala/catalyst/execution/MetastoreCatalog.scala
+++ b/src/main/scala/catalyst/execution/MetastoreCatalog.scala
@@ -5,6 +5,7 @@ import org.apache.hadoop.hive.conf.HiveConf
 import org.apache.hadoop.hive.metastore.api.{FieldSchema, Partition, Table, StorageDescriptor, SerDeInfo}
 import org.apache.hadoop.hive.metastore.HiveMetaStoreClient
 import org.apache.hadoop.hive.ql.plan.TableDesc
+import org.apache.hadoop.hive.ql.session.SessionState
 import org.apache.hadoop.hive.serde2.AbstractDeserializer
 import org.apache.hadoop.mapred.InputFormat
 
@@ -21,7 +22,7 @@ class HiveMetastoreCatalog(hiveConf: HiveConf) extends Catalog {
 
   def lookupRelation(name: String, alias: Option[String]): BaseRelation = {
     val (databaseName, tableName) = name.split("\\.") match {
-      case Array(tableOnly) => ("default", tableOnly)
+      case Array(tableOnly) => (SessionState.get.getCurrentDatabase(), tableOnly)
       case Array(db, table) => (db, table)
     }
     val table = client.getTable(databaseName, tableName)
@@ -46,7 +47,7 @@ class HiveMetastoreCatalog(hiveConf: HiveConf) extends Catalog {
     def apply(plan: LogicalPlan): LogicalPlan = plan transform {
       case InsertIntoCreatedTable(name, child) =>
         val (databaseName, tableName) = name.split("\\.") match {
-          case Array(tableOnly) => ("default", tableOnly)
+          case Array(tableOnly) => (SessionState.get.getCurrentDatabase(), tableOnly)
           case Array(db, table) => (db, table)
         }
 

From 1aafea35c32ebb242d12e9ff7b24f04100050ecb Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Thu, 9 Jan 2014 18:42:59 -0800
Subject: [PATCH 06/27] Configure partition whitelist in TestShark reset.

---
 src/main/scala/catalyst/execution/TestShark.scala | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/main/scala/catalyst/execution/TestShark.scala b/src/main/scala/catalyst/execution/TestShark.scala
index a27be94c107c4..e5cba37be9b1b 100644
--- a/src/main/scala/catalyst/execution/TestShark.scala
+++ b/src/main/scala/catalyst/execution/TestShark.scala
@@ -234,6 +234,8 @@ object TestShark extends SharkInstance {
       // For some reason, RESET does not reset the following variables...
       runSqlHive("set datanucleus.cache.collections=true")
       runSqlHive("set datanucleus.cache.collections.lazy=true")
+      // Lots of tests fail if we do not change the partition whitelist from the default.
+      runSqlHive("set hive.metastore.partition.name.whitelist.pattern=[\\-A-Za-z0-9:_]*")
 
       loadedTables.clear()
       catalog.client.getAllTables("default").foreach { t =>

From ca4ea2636e5666fd0442790b623095247d7e5254 Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Thu, 9 Jan 2014 18:43:29 -0800
Subject: [PATCH 07/27] Support for parsing UDF(*).

---
 src/main/scala/catalyst/frontend/Hive.scala | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/main/scala/catalyst/frontend/Hive.scala b/src/main/scala/catalyst/frontend/Hive.scala
index 7161690c29cdd..fb6c1d47b78ec 100644
--- a/src/main/scala/catalyst/frontend/Hive.scala
+++ b/src/main/scala/catalyst/frontend/Hive.scala
@@ -655,6 +655,8 @@ object HiveQl {
     /* UDFs - Must be last otherwise will preempt built in functions */
     case Token("TOK_FUNCTION", Token(name, Nil) :: args) =>
       UnresolvedFunction(name, args.map(nodeToExpr))
+    case Token("TOK_FUNCTIONSTAR", Token(name, Nil) :: args) =>
+      UnresolvedFunction(name, Star(None) :: Nil)
 
     /* Literals */
     case Token("TOK_NULL", Nil) => Literal(null, IntegerType) // TODO: What type is null?

From 68aa2e6f6e85d6e06424676de53f00794e64b468 Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Thu, 9 Jan 2014 18:43:52 -0800
Subject: [PATCH 08/27] Stronger type for Token extractor.

---
 src/main/scala/catalyst/frontend/Hive.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/scala/catalyst/frontend/Hive.scala b/src/main/scala/catalyst/frontend/Hive.scala
index fb6c1d47b78ec..d72b521a3e6b5 100644
--- a/src/main/scala/catalyst/frontend/Hive.scala
+++ b/src/main/scala/catalyst/frontend/Hive.scala
@@ -254,7 +254,7 @@ object HiveQl {
   /** Extractor for matching Hive's AST Tokens. */
   object Token {
     /** @return matches of the form (tokenName, children). */
-    def unapply(t: Any) = t match {
+    def unapply(t: Any): Option[(String, Seq[ASTNode])] = t match {
       case t: ASTNode =>
         Some((t.getText, Option(t.getChildren).map(_.toList).getOrElse(Nil).asInstanceOf[Seq[ASTNode]]))
       case _ => None

From 516481ca197807e91309780c81a719866cef336f Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Thu, 9 Jan 2014 18:44:21 -0800
Subject: [PATCH 09/27] Ignore requests to explain native commands.

---
 src/main/scala/catalyst/frontend/Hive.scala | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/main/scala/catalyst/frontend/Hive.scala b/src/main/scala/catalyst/frontend/Hive.scala
index d72b521a3e6b5..031bd48739952 100644
--- a/src/main/scala/catalyst/frontend/Hive.scala
+++ b/src/main/scala/catalyst/frontend/Hive.scala
@@ -339,8 +339,8 @@ object HiveQl {
   }
 
   protected def nodeToPlan(node: Node): LogicalPlan = node match {
-    // Just fake explain on create function...
-    case Token("TOK_EXPLAIN", Token("TOK_CREATEFUNCTION", _) :: Nil) => NoRelation
+    // Just fake explain for any of the native commands.
+    case Token("TOK_EXPLAIN", Token(explainType, _) :: Nil) if nativeCommands contains explainType => NoRelation
     case Token("TOK_EXPLAIN", explainArgs) =>
       // Ignore FORMATTED if present.
       val Some(query) :: _ :: _ :: Nil = getClauses(Seq("TOK_QUERY", "FORMATTED", "EXTENDED"), explainArgs)

From 4b6fed8a47e00675171faafac0e582df39f3ff58 Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Thu, 9 Jan 2014 18:44:54 -0800
Subject: [PATCH 10/27] support for parsing both DESTINATION and INSERT_INTO.

---
 src/main/scala/catalyst/frontend/Hive.scala | 52 ++++++++++++++-------
 1 file changed, 34 insertions(+), 18 deletions(-)

diff --git a/src/main/scala/catalyst/frontend/Hive.scala b/src/main/scala/catalyst/frontend/Hive.scala
index 031bd48739952..9b2b9997d2d69 100644
--- a/src/main/scala/catalyst/frontend/Hive.scala
+++ b/src/main/scala/catalyst/frontend/Hive.scala
@@ -362,13 +362,15 @@ object HiveQl {
 
       // Return one query for each insert clause.
       val queries = insertClauses.map { case Token("TOK_INSERT", singleInsert) =>
-        val (Some(destClause) ::
+        val (
+            intoClause ::
+            destClause ::
             Some(selectClause) ::
             whereClause ::
             groupByClause ::
             orderByClause ::
             sortByClause ::
-            limitClause :: Nil) = getClauses(Seq("TOK_DESTINATION", "TOK_SELECT", "TOK_WHERE", "TOK_GROUPBY", "TOK_ORDERBY", "TOK_SORTBY", "TOK_LIMIT"), singleInsert)
+            limitClause :: Nil) = getClauses(Seq("TOK_INSERT_INTO", "TOK_DESTINATION", "TOK_SELECT", "TOK_WHERE", "TOK_GROUPBY", "TOK_ORDERBY", "TOK_SORTBY", "TOK_LIMIT"), singleInsert)
 
         val relations = nodeToRelation(fromClause)
         val withWhere = whereClause.map { whereNode =>
@@ -420,8 +422,13 @@ object HiveQl {
             .map(StopAfter(_, withSort))
             .getOrElse(withSort)
 
+        // There are two tokens for specifying where to sent the result that seem to be used almost
+        // interchangeably.
+        val resultDestination =
+          (intoClause orElse destClause).getOrElse(sys.error("No destination found."))
+
         nodeToDest(
-          destClause,
+          resultDestination,
           withLimit)
       }
 
@@ -441,11 +448,27 @@ object HiveQl {
       Subquery(alias, nodeToPlan(query))
 
     /* Table, No Alias */
-    case Token("TOK_TABREF",
-           Token("TOK_TABNAME",
-             tableNameParts) :: Nil) =>
-      val tableName = tableNameParts.map { case Token(part, Nil) => part }.mkString(".")
-      UnresolvedRelation(tableName, None)
+    case Token("TOK_TABREF", clauses) =>
+      // If the last clause is not a token then it's the alias of the table.
+      val (nonAliasClauses, aliasClause) =
+        if(clauses.last.getText.startsWith("TOK"))
+          (clauses, None)
+        else
+          (clauses.dropRight(1), Some(clauses.last))
+
+      val (Some(tableNameParts) ::
+          sampleClause :: Nil) = getClauses(Seq("TOK_TABNAME", "TOK_TABLESPLITSAMPLE"), nonAliasClauses)
+
+      val tableName = tableNameParts.getChildren.map { case Token(part, Nil) => part }.mkString(".")
+      val alias = aliasClause.map { case Token(a, Nil) => a }
+      val relation = UnresolvedRelation(tableName, alias)
+      // Apply sampling if requested.
+      sampleClause.map {
+        case Token("TOK_TABLESPLITSAMPLE",
+               Token("TOK_ROWCOUNT", Nil) ::
+               Token(count, Nil) :: Nil) =>
+          StopAfter(Literal(count.toInt), relation)
+      }.getOrElse(relation)
 
     case Token("TOK_UNIQUEJOIN", joinArgs) =>
       val tableOrdinals =
@@ -492,14 +515,6 @@ object HiveQl {
       // named output expressions where some aggregate expression has been applied (i.e. First).
       ??? /// Aggregate(groups, Star(None, First(_)) :: Nil, joinedResult)
 
-    /* Table with Alias */
-    case Token("TOK_TABREF",
-           Token("TOK_TABNAME",
-             tableNameParts) ::
-             Token(alias, Nil) :: Nil) =>
-      val tableName = tableNameParts.map { case Token(part, Nil) => part }.mkString(".")
-      UnresolvedRelation(tableName, Some(alias))
-
     case Token(allJoinTokens(joinToken),
            relation1 ::
            relation2 :: other) =>
@@ -529,13 +544,14 @@ object HiveQl {
       throw new NotImplementedError(s"No parse rules for:\n ${dumpTree(a).toString} ")
   }
 
+  val destinationToken = "TOK_DESTINATION|TOK_INSERT_INTO".r
   protected def nodeToDest(node: Node, query: LogicalPlan): LogicalPlan = node match {
-    case Token("TOK_DESTINATION",
+    case Token(destinationToken(),
            Token("TOK_DIR",
              Token("TOK_TMP_FILE", Nil) :: Nil) :: Nil) =>
       query
 
-    case Token("TOK_DESTINATION",
+    case Token(destinationToken(),
            Token("TOK_TAB",
               tableArgs) :: Nil) =>
       val Some(nameClause) :: partitionClause :: Nil =

From 4c5fb0f4d804e8767ced60e30dcb01db9a86117c Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Fri, 10 Jan 2014 12:01:40 -0800
Subject: [PATCH 11/27] makefile target for building new whitelist.

---
 Makefile | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Makefile b/Makefile
index bbcf31f353131..8c66f1833f588 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,8 @@
 all: a b c d e f g h i j k l m n o p q r s t u v w x y" z
 
+buildWhiteList:
+	sbt -Dshark.hive.alltests "test-only catalyst.execution.HiveCompatibility"
+
 findBroken:
 	sbt -Dshark.hive.alltests -Dshark.hive.failFast "test-only catalyst.execution.HiveCompatibility"
 

From 4c6b454116025439dc6a5c3562aa203d30a1ffe3 Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Fri, 10 Jan 2014 12:02:31 -0800
Subject: [PATCH 12/27] add option for recomputing the cached golden answer
 when tests fail.

---
 .../execution/HiveComparisonTest.scala        | 23 +++++++++++++++----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/src/test/scala/catalyst/execution/HiveComparisonTest.scala b/src/test/scala/catalyst/execution/HiveComparisonTest.scala
index 996c643125a26..a6a3c3b172f28 100644
--- a/src/test/scala/catalyst/execution/HiveComparisonTest.scala
+++ b/src/test/scala/catalyst/execution/HiveComparisonTest.scala
@@ -13,11 +13,13 @@ import util._
  *
  * The "golden" results from Hive are cached in an retrieved both from the classpath and
  * [[answerCache]] to speed up testing.
+ *
+ * TODO(marmbrus): Document system properties.
  */
 abstract class HiveComparisonTest extends FunSuite with BeforeAndAfterAll with GivenWhenThen with Logging {
   protected val targetDir = new File("target")
 
-  /** The local directory with cached golden answer will be stored */
+  /** The local directory with cached golden answer will be stored. */
   protected val answerCache = new File(targetDir, "comparison-test-cache")
   if (!answerCache.exists)
     answerCache.mkdir()
@@ -40,6 +42,12 @@ abstract class HiveComparisonTest extends FunSuite with BeforeAndAfterAll with G
   val failFast = System.getProperty("shark.hive.failFast") != null
   private var testFailed = false
 
+  /**
+   * Delete any cache files that result in test failures.  Used when the test harness has been
+   * updated thus requiring new golden answers to be computed for some tests.
+   */
+  val recomputeCache = System.getProperty("shark.hive.recomputeCache") != null
+
   protected val cacheDigest = java.security.MessageDigest.getInstance("MD5")
   protected def getMd5(str: String): String = {
     val digest = java.security.MessageDigest.getInstance("MD5")
@@ -66,7 +74,7 @@ abstract class HiveComparisonTest extends FunSuite with BeforeAndAfterAll with G
     line.replaceAll("\"lastUpdateTime\":\\d+", "<UPDATETIME>")
 
   /**
-   * Removes non-deterministic paths from str` so cached answers will still pass.
+   * Removes non-deterministic paths from `str` so cached answers will still pass.
    */
   protected def cleanPaths(str: String): String = {
     str.replaceAll("file:\\/.*\\/", "<PATH>")
@@ -98,7 +106,7 @@ abstract class HiveComparisonTest extends FunSuite with BeforeAndAfterAll with G
         }
 
         val hiveCachedResults = hiveCacheFiles.flatMap { cachedAnswerFile =>
-          logger.warn(s"Looking for cached answer file $cachedAnswerFile.")
+          logger.debug(s"Looking for cached answer file $cachedAnswerFile.")
           if (cachedAnswerFile.exists) {
             Some(fileToString(cachedAnswerFile))
           } else if (getClass.getClassLoader.getResourceAsStream(cachedAnswerFile.toString) != null) {
@@ -173,13 +181,18 @@ abstract class HiveComparisonTest extends FunSuite with BeforeAndAfterAll with G
               val hivePrintOut = s"== HIVE - ${hive.size} row(s) ==" +: preparedHive
               val catalystPrintOut = s"== CATALYST - ${catalyst.size} row(s) ==" +: catalyst
 
-              val resultComparision = sideBySide(hivePrintOut, catalystPrintOut).mkString("\n")
+              val resultComparison = sideBySide(hivePrintOut, catalystPrintOut).mkString("\n")
+
+              if(recomputeCache) {
+                logger.warn(s"Clearing cache files for failed test $testCaseName")
+                hiveCacheFiles.foreach(_.delete())
+              }
 
               fail(
                 s"""
                   |Results do not match for query:
                   |$sharkQuery\n${sharkQuery.analyzed.output.map(_.name).mkString("\t")}
-                  |$resultComparision
+                  |$resultComparison
                 """.stripMargin)
             }
         }

From b01468dfdd3d99c2159534fbff152d9ff46576fb Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Fri, 10 Jan 2014 12:03:09 -0800
Subject: [PATCH 13/27] support path rewrites when the query begins with a
 comment.

---
 src/main/scala/catalyst/execution/TestShark.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/scala/catalyst/execution/TestShark.scala b/src/main/scala/catalyst/execution/TestShark.scala
index e5cba37be9b1b..722baa3d8d594 100644
--- a/src/main/scala/catalyst/execution/TestShark.scala
+++ b/src/main/scala/catalyst/execution/TestShark.scala
@@ -85,7 +85,7 @@ object TestShark extends SharkInstance {
    * hive test cases assume the system is set up.
    */
   private def rewritePaths(cmd: String): String =
-    if (cmd.toUpperCase startsWith "LOAD")
+    if (cmd.toUpperCase contains "LOAD DATA")
       cmd.replaceAll("\\.\\.", hiveDevHome.getCanonicalPath)
     else
       cmd

From 8364ec2cc2a34461f15a7e9b19e426d7d9ce79b7 Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Fri, 10 Jan 2014 12:03:28 -0800
Subject: [PATCH 14/27] whitelist all possible partition values.

---
 src/main/scala/catalyst/execution/TestShark.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/scala/catalyst/execution/TestShark.scala b/src/main/scala/catalyst/execution/TestShark.scala
index 722baa3d8d594..506f43d8e312c 100644
--- a/src/main/scala/catalyst/execution/TestShark.scala
+++ b/src/main/scala/catalyst/execution/TestShark.scala
@@ -235,7 +235,7 @@ object TestShark extends SharkInstance {
       runSqlHive("set datanucleus.cache.collections=true")
       runSqlHive("set datanucleus.cache.collections.lazy=true")
       // Lots of tests fail if we do not change the partition whitelist from the default.
-      runSqlHive("set hive.metastore.partition.name.whitelist.pattern=[\\-A-Za-z0-9:_]*")
+      runSqlHive("set hive.metastore.partition.name.whitelist.pattern=.*")
 
       loadedTables.clear()
       catalog.client.getAllTables("default").foreach { t =>

From 78d730d657d0904b5a3c28b49737594aa40ce9cb Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Fri, 10 Jan 2014 12:06:01 -0800
Subject: [PATCH 15/27] Load src test table on RESET.

---
 src/main/scala/catalyst/execution/TestShark.scala | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/main/scala/catalyst/execution/TestShark.scala b/src/main/scala/catalyst/execution/TestShark.scala
index 506f43d8e312c..736166326656b 100644
--- a/src/main/scala/catalyst/execution/TestShark.scala
+++ b/src/main/scala/catalyst/execution/TestShark.scala
@@ -263,6 +263,12 @@ object TestShark extends SharkInstance {
       configure()
 
       runSqlHive("USE default")
+
+      // Just loading src makes a lot of tests pass.  This is because some tests do something like
+      // drop an index on src at the beginning.  Since we just pass DDL to hive this bypasses our
+      // Analyzer and thus the test table auto-loading mechanism.
+      // Remove after we handle more DDL operations natively.
+      loadTestTable("src")
     } catch {
       case e: Exception =>
         logger.error(s"FATAL ERROR: Failed to reset TestDB state. $e")

From 0d9d56aca705b856191a636e27723c57b74d7d93 Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Fri, 10 Jan 2014 12:07:20 -0800
Subject: [PATCH 16/27] add more native commands to parser

---
 src/main/scala/catalyst/frontend/Hive.scala | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/main/scala/catalyst/frontend/Hive.scala b/src/main/scala/catalyst/frontend/Hive.scala
index 9b2b9997d2d69..c0a4de505aef9 100644
--- a/src/main/scala/catalyst/frontend/Hive.scala
+++ b/src/main/scala/catalyst/frontend/Hive.scala
@@ -82,6 +82,7 @@ object HiveQl {
     "TOK_ALTERINDEX_REBUILD",
     "TOK_ALTERTABLE_ADDCOLS",
     "TOK_ALTERTABLE_ADDPARTS",
+    "TOK_ALTERTABLE_ALTERPARTS",
     "TOK_ALTERTABLE_ARCHIVE",
     "TOK_ALTERTABLE_CLUSTER_SORT",
     "TOK_ALTERTABLE_DROPPARTS",
@@ -90,6 +91,7 @@ object HiveQl {
     "TOK_ALTERTABLE_RENAME",
     "TOK_ALTERTABLE_RENAMECOL",
     "TOK_ALTERTABLE_REPLACECOLS",
+    "TOK_ALTERTABLE_SKEWED",
     "TOK_ALTERTABLE_TOUCH",
     "TOK_ALTERTABLE_UNARCHIVE",
     "TOK_ANALYZE",

From 4cfc11a6c9dfc16cb216f2a18da4e4dd09bd246e Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Fri, 10 Jan 2014 12:07:36 -0800
Subject: [PATCH 17/27] more test coverage.

---
 build.sbt                                     |   2 +-
 .../execution/HiveCompatibility.scala         | 117 +++++++++++++++++-
 2 files changed, 116 insertions(+), 3 deletions(-)

diff --git a/build.sbt b/build.sbt
index 401fc04fd3cdd..e660fb5bbf584 100644
--- a/build.sbt
+++ b/build.sbt
@@ -14,7 +14,7 @@ resolvers += "Local Maven Repository" at "file://"+Path.userHome.absolutePath+"/
 
 libraryDependencies += "org.apache.spark" %% "spark-core" % "0.9.0-incubating-SNAPSHOT"
 
-libraryDependencies += "catalyst" % "hive-golden" % "2" % "test" from "http://repository-databricks.forge.cloudbees.com/snapshot/catalystGolden2.jar"
+libraryDependencies += "catalyst" % "hive-golden" % "3" % "test" from "http://repository-databricks.forge.cloudbees.com/snapshot/catalystGolden3.jar"
 
 // Hive 0.10.0 relies on a weird version of jdo that is not published anywhere... Remove when we upgrade to 0.11.0
 libraryDependencies += "javax.jdo" % "jdo2-api" % "2.3-ec" from "http://www.datanucleus.org/downloads/maven2/javax/jdo/jdo2-api/2.3-ec/jdo2-api-2.3-ec.jar"
diff --git a/src/test/scala/catalyst/execution/HiveCompatibility.scala b/src/test/scala/catalyst/execution/HiveCompatibility.scala
index 32daa17156b92..5c1dcf78ba6fd 100644
--- a/src/test/scala/catalyst/execution/HiveCompatibility.scala
+++ b/src/test/scala/catalyst/execution/HiveCompatibility.scala
@@ -31,6 +31,7 @@ class HiveCompatibility extends HiveQueryFileTest {
     "authorization_5",
     "keyword_1",
     "misc_json",
+    "create_like_tbl_props",
 
     // Timezone specific test answers.
     "udf_unix_timestamp",
@@ -52,6 +53,11 @@ class HiveCompatibility extends HiveQueryFileTest {
     // These tests fail and and exit the JVM.
     "auto_join18_multi_distinct",
     "join18_multi_distinct",
+    "input44",
+    "input42",
+    "input_dfs",
+    "metadata_export_drop",
+    "repair",
 
     // Uses a serde that isn't on the classpath... breaks other tests.
     "bucketizedhiveinputformat",
@@ -64,7 +70,23 @@ class HiveCompatibility extends HiveQueryFileTest {
     "uniquejoin",
 
     // Hive seems to get the wrong answer on some outer joins.  MySQL agrees with catalyst.
-    "auto_join29"
+    "auto_join29",
+
+    // No support for multi-alias i.e. udf as (e1, e2, e3).
+    "allcolref_in_udf",
+
+    // No support for TestSerDe (not published afaik)
+    "alter1",
+    "input16",
+
+    // Shark does not support buckets.
+    ".*bucket.*",
+
+    // No window support yet
+    ".* window.*",
+
+    // Fails in hive with authorization errors.
+    "alter_rename_partition_authorization"
   )
 
   /**
@@ -77,7 +99,10 @@ class HiveCompatibility extends HiveQueryFileTest {
     "add_partition_with_whitelist",
     "alias_casted_column",
     "alter4",
+    "alter_index",
+    "alter_partition_format_loc",
     "alter_partition_with_whitelist",
+    "alter_table_serde",
     "ambiguous_col",
     "authorization_3",
     "authorization_5",
@@ -87,10 +112,18 @@ class HiveCompatibility extends HiveQueryFileTest {
     "auto_join26",
     "auto_join28",
     "auto_join_nulls",
+    "auto_sortmerge_join_1",
     "auto_sortmerge_join_10",
+    "auto_sortmerge_join_11",
+    "auto_sortmerge_join_12",
     "auto_sortmerge_join_15",
+    "auto_sortmerge_join_2",
+    "auto_sortmerge_join_3",
+    "auto_sortmerge_join_4",
     "auto_sortmerge_join_5",
     "auto_sortmerge_join_6",
+    "auto_sortmerge_join_7",
+    "auto_sortmerge_join_8",
     "auto_sortmerge_join_9",
     "binarysortable_1",
     "bucket1",
@@ -106,19 +139,34 @@ class HiveCompatibility extends HiveQueryFileTest {
     "correlationoptimizer6",
     "correlationoptimizer7",
     "count",
+    "create_like2",
+    "create_like_tbl_props",
+    "create_view_translate",
     "ct_case_insensitive",
     "database_properties",
+    "default_partition_name",
     "delimiter",
     "desc_non_existent_tbl",
     "describe_database_json",
     "describe_table_json",
+    "describe_formatted_view_partitioned",
+    "describe_formatted_view_partitioned_json",
+    "describe_pretty",
+    "describe_syntax",
+    "diff_part_input_formats",
     "disable_file_format_check",
     "drop_function",
     "drop_index",
+    "drop_partitions_filter",
+    "drop_partitions_filter2",
+    "drop_partitions_filter3",
+    "drop_partitions_ignore_protection",
     "drop_table",
+    "drop_table2",
     "drop_view",
     "escape_orderby1",
     "escape_sortby1",
+    "filter_join_breaktask",
     "groupby1",
     "groupby1_map",
     "groupby1_map_nomap",
@@ -130,34 +178,65 @@ class HiveCompatibility extends HiveQueryFileTest {
     "groupby5_map",
     "groupby5_map_skew",
     "groupby5_noskew",
+    "groupby7",
+    "groupby7_map",
+    "groupby7_map_multi_single_reducer",
+    "groupby7_map_skew",
+    "groupby7_noskew",
+    "groupby8_map",
+    "groupby8_map_skew",
+    "groupby8_noskew",
     "groupby_multi_single_reducer2",
     "groupby_mutli_insert_common_distinct",
+    "groupby_sort_6",
+    "groupby_sort_8",
     "groupby_sort_test_1",
     "implicit_cast1",
     "index_auto_self_join",
     "index_auto_update",
     "index_stale",
+    "index_auth",
+    "index_auto_file_format",
+    "index_auto_mult_tables",
+    "index_auto_mult_tables_compact",
+    "index_auto_multiple",
+    "index_bitmap_compression",
+    "index_compression",
     "innerjoin",
     "inoutdriver",
+    "input",
     "input0",
     "input11",
     "input11_limit",
     "input1_limit",
     "input22",
+    "input23",
+    "input24",
+    "input25",
+    "input28",
+    "input2_limit",
+    "input41",
     "input4_cb_delim",
     "input4_limit",
     "input6",
     "input7",
     "input8",
     "input9",
+    "input_limit",
+    "input_part1",
+    "input_part2",
     "inputddl4",
+    "inputddl7",
     "inputddl8",
+    "insert_compressed",
     "join0",
     "join1",
     "join10",
     "join11",
     "join12",
     "join13",
+    "join14",
+    "join14_hadoop20",
     "join15",
     "join16",
     "join17",
@@ -170,11 +249,15 @@ class HiveCompatibility extends HiveQueryFileTest {
     "join23",
     "join24",
     "join25",
+    "join26",
     "join27",
+    "join28",
     "join29",
     "join3",
     "join30",
     "join31",
+    "join32",
+    "join33",
     "join34",
     "join35",
     "join36",
@@ -188,6 +271,7 @@ class HiveCompatibility extends HiveQueryFileTest {
     "join6",
     "join7",
     "join8",
+    "join9",
     "join_casesensitive",
     "join_empty",
     "join_hive_626",
@@ -203,8 +287,14 @@ class HiveCompatibility extends HiveQueryFileTest {
     "literal_ints",
     "literal_string",
     "load_file_with_space_in_the_name",
+    "louter_join_ppr",
+    "mapjoin_mapjoin",
+    "mapjoin_subquery",
     "mapjoin_subquery2",
+    "mapjoin_test_outer",
     "mapreduce3",
+    "merge1",
+    "merge2",
     "mergejoins",
     "mergejoins_mixed",
     "misc_json",
@@ -216,12 +306,22 @@ class HiveCompatibility extends HiveQueryFileTest {
     "notable_alias2",
     "nullgroup",
     "nullgroup2",
+    "nullgroup3",
+    "nullgroup5",
     "nullinput",
     "nullinput2",
     "nullscript",
     "optional_outer",
+    "order",
+    "order2",
+    "outer_join_ppr",
+    "part_inherit_tbl_props",
+    "part_inherit_tbl_props_empty",
+    "partition_schema1",
+    "partitions_json",
     "plan_json",
     "ppd1",
+    "ppd_constant_where",
     "ppd_gby",
     "ppd_gby_join",
     "ppd_join",
@@ -237,28 +337,40 @@ class HiveCompatibility extends HiveQueryFileTest {
     "ppd_udf_col",
     "ppd_union",
     "progress_1",
+    "protectmode",
+    "push_or",
     "query_with_semi",
     "quote2",
     "rename_column",
+    "router_join_ppr",
     "select_as_omitted",
+    "select_unquote_and",
+    "select_unquote_not",
+    "select_unquote_or",
+    "serde_reported_schema",
     "set_variable_sub",
     "show_describe_func_quotes",
     "show_functions",
     "skewjoinopt13",
     "skewjoinopt18",
     "skewjoinopt9",
+    "smb_mapjoin_10",
     "smb_mapjoin_13",
     "smb_mapjoin_14",
     "smb_mapjoin_15",
     "smb_mapjoin_16",
     "smb_mapjoin_17",
+    "smb_mapjoin_21",
+    "sort",
     "sort_merge_join_desc_1",
     "sort_merge_join_desc_2",
     "sort_merge_join_desc_3",
     "sort_merge_join_desc_4",
+    "sort_merge_join_desc_5",
+    "sort_merge_join_desc_6",
+    "sort_merge_join_desc_7",
     "subq2",
     "tablename_with_select",
-    "type_cast_1",
     "udf2",
     "udf9",
     "udf_10_trims",
@@ -288,6 +400,7 @@ class HiveCompatibility extends HiveQueryFileTest {
     "udf_datediff",
     "udf_day",
     "udf_dayofmonth",
+    "udf_div",
     "udf_double",
     "udf_exp",
     "udf_field",

From 9ae740a589fb24aaf7635f3533abb570ca877281 Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Fri, 10 Jan 2014 14:11:09 -0800
Subject: [PATCH 18/27] blacklist more tests that require MR.

---
 src/test/scala/catalyst/execution/HiveCompatibility.scala | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/src/test/scala/catalyst/execution/HiveCompatibility.scala b/src/test/scala/catalyst/execution/HiveCompatibility.scala
index 5c1dcf78ba6fd..ebd10286b4bd5 100644
--- a/src/test/scala/catalyst/execution/HiveCompatibility.scala
+++ b/src/test/scala/catalyst/execution/HiveCompatibility.scala
@@ -42,6 +42,13 @@ class HiveCompatibility extends HiveQueryFileTest {
     "index_auto_self_join",
     "index_stale",
     "type_cast_1",
+    "index_compression",
+    "index_bitmap_compression",
+    "index_auto_multiple",
+    "index_auto_mult_tables_compact",
+    "index_auto_mult_tables",
+    "index_auto_file_format",
+    "index_auth",
 
     // Hive seems to think 1.0 > NaN = true && 1.0 < NaN = false... which is wrong.
     // http://stackoverflow.com/a/1573715

From 755b2292e980e285d5f7e5bdd4b66f65aa8ee211 Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Fri, 10 Jan 2014 14:35:07 -0800
Subject: [PATCH 19/27] blacklist some ddl tests.

---
 src/test/scala/catalyst/execution/HiveCompatibility.scala | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/test/scala/catalyst/execution/HiveCompatibility.scala b/src/test/scala/catalyst/execution/HiveCompatibility.scala
index ebd10286b4bd5..fd1595e86d102 100644
--- a/src/test/scala/catalyst/execution/HiveCompatibility.scala
+++ b/src/test/scala/catalyst/execution/HiveCompatibility.scala
@@ -33,6 +33,11 @@ class HiveCompatibility extends HiveQueryFileTest {
     "misc_json",
     "create_like_tbl_props",
 
+    // Weird DDL differences result in failures on jenkins.
+    "create_like2",
+    "create_view_translate",
+    "partitions_json",
+
     // Timezone specific test answers.
     "udf_unix_timestamp",
     "udf_to_unix_timestamp",

From e9f45889903455fc2e8f298af4f8aefec40d10d8 Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Fri, 10 Jan 2014 14:55:17 -0800
Subject: [PATCH 20/27] fix > 100 char.

---
 src/test/scala/catalyst/execution/HiveCompatibility.scala | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/test/scala/catalyst/execution/HiveCompatibility.scala b/src/test/scala/catalyst/execution/HiveCompatibility.scala
index fd1595e86d102..f2eeebd6c886a 100644
--- a/src/test/scala/catalyst/execution/HiveCompatibility.scala
+++ b/src/test/scala/catalyst/execution/HiveCompatibility.scala
@@ -15,7 +15,8 @@ class HiveCompatibility extends HiveQueryFileTest {
 
   /** A list of tests deemed out of scope currently and thus completely disregarded */
   override def blackList = Seq(
-    "hook_order", // These tests use hooks that are not on the classpath and thus break all subsequent SQL execution.
+    // These tests use hooks that are not on the classpath and thus break all subsequent execution.
+    "hook_order",
     "hook_context",
     "mapjoin_hook",
     "multi_sahooks",

From ef7b9435dfa3a17ac66e8f54a6fe0d648610c07e Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Sat, 11 Jan 2014 14:26:22 -0800
Subject: [PATCH 21/27] add metastore support for float

---
 src/main/scala/catalyst/execution/MetastoreCatalog.scala | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/main/scala/catalyst/execution/MetastoreCatalog.scala b/src/main/scala/catalyst/execution/MetastoreCatalog.scala
index d83242583f541..12b06996bd66e 100644
--- a/src/main/scala/catalyst/execution/MetastoreCatalog.scala
+++ b/src/main/scala/catalyst/execution/MetastoreCatalog.scala
@@ -82,6 +82,7 @@ object HiveMetatoreTypes {
   def toDataType(metastoreType: String): DataType =
     metastoreType match {
       case "string" => StringType
+      case "float" => FloatType
       case "int" => IntegerType
       case "double" => DoubleType
       case "bigint" => LongType

From f0faa264b84e670e0139bd1496aef895b6f705ee Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Sat, 11 Jan 2014 14:31:02 -0800
Subject: [PATCH 22/27] add sample and distinct operators.

---
 .../scala/catalyst/plans/logical/basicOperators.scala  | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/src/main/scala/catalyst/plans/logical/basicOperators.scala b/src/main/scala/catalyst/plans/logical/basicOperators.scala
index 224e44d857051..0e34e7629a16a 100644
--- a/src/main/scala/catalyst/plans/logical/basicOperators.scala
+++ b/src/main/scala/catalyst/plans/logical/basicOperators.scala
@@ -68,6 +68,16 @@ case class Subquery(alias: String, child: LogicalPlan) extends UnaryNode {
   def references = Set.empty
 }
 
+case class Sample(percentage: Double, child: LogicalPlan) extends UnaryNode {
+  def output = child.output
+  def references = Set.empty
+}
+
+case class Distinct(child: LogicalPlan) extends UnaryNode {
+  def output = child.output
+  def references = child.outputSet
+}
+
 case object NoRelation extends LeafNode {
   def output = Nil
 }

From f58d5a56ca93367658a4c34930c49225c7323952 Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Sat, 11 Jan 2014 14:27:02 -0800
Subject: [PATCH 23/27] support for parsing SELECT DISTINCT

---
 src/main/scala/catalyst/frontend/Hive.scala | 21 +++++++++++++++------
 1 file changed, 15 insertions(+), 6 deletions(-)

diff --git a/src/main/scala/catalyst/frontend/Hive.scala b/src/main/scala/catalyst/frontend/Hive.scala
index c0a4de505aef9..0692c83543c1d 100644
--- a/src/main/scala/catalyst/frontend/Hive.scala
+++ b/src/main/scala/catalyst/frontend/Hive.scala
@@ -367,12 +367,13 @@ object HiveQl {
         val (
             intoClause ::
             destClause ::
-            Some(selectClause) ::
+            selectClause ::
+            selectDistinctClause ::
             whereClause ::
             groupByClause ::
             orderByClause ::
             sortByClause ::
-            limitClause :: Nil) = getClauses(Seq("TOK_INSERT_INTO", "TOK_DESTINATION", "TOK_SELECT", "TOK_WHERE", "TOK_GROUPBY", "TOK_ORDERBY", "TOK_SORTBY", "TOK_LIMIT"), singleInsert)
+            limitClause :: Nil) = getClauses(Seq("TOK_INSERT_INTO", "TOK_DESTINATION", "TOK_SELECT", "TOK_SELECTDI", "TOK_WHERE", "TOK_GROUPBY", "TOK_ORDERBY", "TOK_SORTBY", "TOK_LIMIT"), singleInsert)
 
         val relations = nodeToRelation(fromClause)
         val withWhere = whereClause.map { whereNode =>
@@ -380,10 +381,12 @@ object HiveQl {
           Filter(nodeToExpr(whereExpr), relations)
         }.getOrElse(relations)
 
+        val select =
+          (selectClause orElse selectDistinctClause).getOrElse(sys.error("No select clause."))
 
         // Script transformations are expressed as a select clause with a single expression of type
         // TOK_TRANSFORM
-        val transformation = selectClause.getChildren.head match {
+        val transformation = select.getChildren.head match {
           case Token("TOK_SELEXPR",
                  Token("TOK_TRANSFORM",
                    Token("TOK_EXPLIST", inputExprs) ::
@@ -404,7 +407,7 @@ object HiveQl {
         // a script transformation.
         val withProject = transformation.getOrElse {
           // Not a transformation so must be either project or aggregation.
-          val selectExpressions = nameExpressions(selectClause.getChildren.flatMap(selExprNodeToExpr))
+          val selectExpressions = nameExpressions(select.getChildren.flatMap(selExprNodeToExpr))
 
           groupByClause match {
             case Some(groupBy) => Aggregate(groupBy.getChildren.map(nodeToExpr), selectExpressions, withWhere)
@@ -412,13 +415,19 @@ object HiveQl {
           }
         }
 
+        val withDistinct =
+          if(selectDistinctClause.isDefined)
+            Distinct(withProject)
+          else
+            withProject
+
         require(!(orderByClause.isDefined && sortByClause.isDefined), "Can't have both a sort by and order by.")
         // Right now we treat sorting and ordering as identical.
         val withSort =
           (orderByClause orElse sortByClause)
             .map(_.getChildren.map(nodeToSortOrder))
-            .map(Sort(_, withProject))
-            .getOrElse(withProject)
+            .map(Sort(_, withDistinct))
+            .getOrElse(withDistinct)
         val withLimit =
           limitClause.map(l => nodeToExpr(l.getChildren.head))
             .map(StopAfter(_, withSort))

From a92919d8ee1c1d3ce10082d0023b28dd63efd98b Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Sat, 11 Jan 2014 14:27:15 -0800
Subject: [PATCH 24/27] add alter view as to native commands

---
 src/main/scala/catalyst/frontend/Hive.scala | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/main/scala/catalyst/frontend/Hive.scala b/src/main/scala/catalyst/frontend/Hive.scala
index 0692c83543c1d..1f203288543f2 100644
--- a/src/main/scala/catalyst/frontend/Hive.scala
+++ b/src/main/scala/catalyst/frontend/Hive.scala
@@ -105,6 +105,7 @@ object HiveQl {
 
     // TODO(marmbrus): Figure out how view are expanded by hive, as we might need to handle this.
     "TOK_ALTERVIEW_ADDPARTS",
+    "TOK_ALTERVIEW_AS",
     "TOK_ALTERVIEW_DROPPARTS",
     "TOK_ALTERVIEW_PROPERTIES",
     "TOK_ALTERVIEW_RENAME",

From 0e975eafb5c8e1a1b9fd1c5155beb51716f2668e Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Sat, 11 Jan 2014 14:29:18 -0800
Subject: [PATCH 25/27] parse bucket sampling as percentage sampling

---
 src/main/scala/catalyst/frontend/Hive.scala | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/main/scala/catalyst/frontend/Hive.scala b/src/main/scala/catalyst/frontend/Hive.scala
index 1f203288543f2..e094a921bbe40 100644
--- a/src/main/scala/catalyst/frontend/Hive.scala
+++ b/src/main/scala/catalyst/frontend/Hive.scala
@@ -459,7 +459,7 @@ object HiveQl {
            query :: Token(alias, Nil) :: Nil) =>
       Subquery(alias, nodeToPlan(query))
 
-    /* Table, No Alias */
+    /* All relations, possibly with aliases or sampling clauses. */
     case Token("TOK_TABREF", clauses) =>
       // If the last clause is not a token then it's the alias of the table.
       val (nonAliasClauses, aliasClause) =
@@ -469,17 +469,23 @@ object HiveQl {
           (clauses.dropRight(1), Some(clauses.last))
 
       val (Some(tableNameParts) ::
-          sampleClause :: Nil) = getClauses(Seq("TOK_TABNAME", "TOK_TABLESPLITSAMPLE"), nonAliasClauses)
+          splitSampleClause ::
+          bucketSampleClause :: Nil) = getClauses(Seq("TOK_TABNAME", "TOK_TABLESPLITSAMPLE", "TOK_TABLEBUCKETSAMPLE"), nonAliasClauses)
 
-      val tableName = tableNameParts.getChildren.map { case Token(part, Nil) => part }.mkString(".")
-      val alias = aliasClause.map { case Token(a, Nil) => a }
+      val tableName = tableNameParts.getChildren.map { case Token(part, Nil) => cleanIdentifier(part) }.mkString(".")
+      val alias = aliasClause.map { case Token(a, Nil) => cleanIdentifier(a) }
       val relation = UnresolvedRelation(tableName, alias)
+
       // Apply sampling if requested.
-      sampleClause.map {
+      (bucketSampleClause orElse splitSampleClause).map {
         case Token("TOK_TABLESPLITSAMPLE",
                Token("TOK_ROWCOUNT", Nil) ::
                Token(count, Nil) :: Nil) =>
           StopAfter(Literal(count.toInt), relation)
+        case Token("TOK_TABLEBUCKETSAMPLE",
+               Token(numerator, Nil) ::
+               Token(denominator, Nil) :: Nil) =>
+          Sample(numerator.toDouble / denominator.toDouble, relation)
       }.getOrElse(relation)
 
     case Token("TOK_UNIQUEJOIN", joinArgs) =>

From c5842d274355ebe859e5cf4cd4c61ca0e4e409af Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Sat, 11 Jan 2014 14:30:44 -0800
Subject: [PATCH 26/27] don't throw an error when a select clause outputs
 multiple copies of the same attribute.

---
 src/main/scala/catalyst/plans/logical/LogicalPlan.scala | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/main/scala/catalyst/plans/logical/LogicalPlan.scala b/src/main/scala/catalyst/plans/logical/LogicalPlan.scala
index 27b96b5219085..c3eaecde3b221 100644
--- a/src/main/scala/catalyst/plans/logical/LogicalPlan.scala
+++ b/src/main/scala/catalyst/plans/logical/LogicalPlan.scala
@@ -41,7 +41,7 @@ abstract class LogicalPlan extends QueryPlan[LogicalPlan] {
       option.name == remainingParts.head
     }
 
-    options match {
+    options.distinct match {
       case a :: Nil => Some(a) // One match, use it.
       case Nil => None         // No matches.
       case ambiguousReferences =>

From 86355a65bf5036dbf05b0c9d0c75d7dc188d09c8 Mon Sep 17 00:00:00 2001
From: Michael Armbrust <michael@databricks.com>
Date: Sat, 11 Jan 2014 14:31:19 -0800
Subject: [PATCH 27/27] throw error if there are unexpected join clauses.

---
 src/main/scala/catalyst/frontend/Hive.scala | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/main/scala/catalyst/frontend/Hive.scala b/src/main/scala/catalyst/frontend/Hive.scala
index e094a921bbe40..ddd0af49a97f6 100644
--- a/src/main/scala/catalyst/frontend/Hive.scala
+++ b/src/main/scala/catalyst/frontend/Hive.scala
@@ -543,6 +543,7 @@ object HiveQl {
         case "TOK_LEFTOUTERJOIN" => LeftOuter
         case "TOK_FULLOUTERJOIN" => FullOuter
       }
+      assert(other.size <= 1, "Unhandled join clauses.")
       Join(nodeToRelation(relation1),
         nodeToRelation(relation2),
         joinType,